tu/a7xx: Don't set FLUSH_PER_OVERLAP_AND_OVERWRITE for feedback loops

A7XX doesn't have the same issue with UBWC flag buffer coherency
as A6XX has.

Though for VK_EXT_rasterization_order_attachment_access we still have
to set prim mode to flushing since it allows not to explicitly synchronize
between writes and reads. Though we could use FLUSH_PER_OVERLAP in sysmem.

Passes:
 dEQP-VK.pipeline.*feedback_loop*
 dEQP-GLES31.functional.blend_equation_advanced.* (with Zink)

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28597>
This commit is contained in:
Danylo Piliaiev 2024-04-05 16:39:00 +02:00 committed by Marge Bot
parent 26417211ae
commit 2890a0615e
3 changed files with 14 additions and 2 deletions

View file

@ -175,6 +175,14 @@ struct fd_dev_info {
/* See ir3_compiler::has_scalar_alu. */
bool has_scalar_alu;
/* Whether writing to UBWC attachment and reading the same image as input
* attachment or as a texture reads correct values from the image.
* If this is false, we may read stale values from the flag buffer,
* thus reading incorrect values from the image.
* Happens with VK_EXT_attachment_feedback_loop_layout.
*/
bool has_coherent_ubwc_flag_caches;
struct {
uint32_t PC_POWER_CNTL;
uint32_t TPL1_DBG_ECO_CNTL;

View file

@ -793,6 +793,7 @@ a7xx_base = A6XXProps(
line_width_min = 1.0,
line_width_max = 127.5,
has_scalar_alu = True,
has_coherent_ubwc_flag_caches = True,
)
a7xx_725 = A7XXProps(

View file

@ -3634,10 +3634,13 @@ tu_pipeline_builder_parse_rasterization_order(
* both input and color attachments from one fragment to the next,
* in rasterization order, without explicit synchronization.
*/
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
if (builder->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches)
sysmem_prim_mode = FLUSH_PER_OVERLAP;
else
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
gmem_prim_mode = FLUSH_PER_OVERLAP;
pipeline->prim_order.sysmem_single_prim_mode = true;
} else {
} else if (!builder->device->physical_device->info->a6xx.has_coherent_ubwc_flag_caches) {
/* If there is a feedback loop, then the shader can read the previous value
* of a pixel being written out. It can also write some components and then
* read different components without a barrier in between. This is a