From c30a4d4fdb3ab43328092f09cba7207bbf04655e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 31 Mar 2026 11:27:49 +0300 Subject: [PATCH] anv/brw/nir: fix wa_18019110168 Several things were wrong : - incorrect offset in the FS push constant data - incorrect encoding of the 32bit values with 2 fields (remap table offset & provoking vertex) Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 1 - src/compiler/nir/nir_intrinsics.py | 3 -- src/intel/compiler/brw/brw_compile_mesh.cpp | 9 ++-- src/intel/compiler/brw/brw_compiler.h | 14 ++++-- src/intel/compiler/brw/brw_from_nir.cpp | 5 -- .../compiler/brw/brw_nir_wa_18019110168.c | 16 +++++-- .../vulkan/anv_nir_compute_push_layout.c | 10 ++-- src/intel/vulkan/anv_private.h | 16 ++++--- src/intel/vulkan/anv_shader_compile.c | 47 ++++++++++++++----- src/intel/vulkan/genX_cmd_draw.c | 7 --- src/intel/vulkan/genX_gfx_state.c | 44 ++++++++++------- 11 files changed, 104 insertions(+), 68 deletions(-) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 2e9d7595c49..93e6ebfb1ab 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -356,7 +356,6 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_fs_z_c0_intel: case nir_intrinsic_load_ray_base_mem_addr_intel: case nir_intrinsic_load_ray_hw_stack_size_intel: - case nir_intrinsic_load_per_primitive_remap_intel: case nir_intrinsic_load_core_count_arm: case nir_intrinsic_load_core_max_id_arm: case nir_intrinsic_load_warp_max_id_arm: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 148c59804b4..f79008d76c4 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2800,9 +2800,6 @@ system_value("tess_config_intel", 1) # Dynamic fragment shader parameters (see intel_fs_config) . system_value("fs_config_intel", 1) -# Per primitive remapping table offset. -system_value("per_primitive_remap_intel", 1) - # The (linear) local invocation index provided in the payload of mesh/task shaders. system_value("local_invocation_index_intel", 1) diff --git a/src/intel/compiler/brw/brw_compile_mesh.cpp b/src/intel/compiler/brw/brw_compile_mesh.cpp index d5aff0e0c33..42ce3b5caf2 100644 --- a/src/intel/compiler/brw/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw/brw_compile_mesh.cpp @@ -1158,14 +1158,15 @@ brw_compile_mesh(const struct brw_compiler *compiler, if (wa_18019110168_mapping[i] != -1) remap_table[i] = prog_data->map.vue_map.varying_to_slot[wa_18019110168_mapping[i]]; } + uint32_t constant_data_aligned_size = align(nir->constant_data_size, 32); uint8_t *const_data = (uint8_t *) rzalloc_size(params->base.mem_ctx, - nir->constant_data_size + sizeof(remap_table)); + constant_data_aligned_size + sizeof(remap_table)); memcpy(const_data, nir->constant_data, nir->constant_data_size); - memcpy(const_data + nir->constant_data_size, remap_table, sizeof(remap_table)); - g.add_const_data(const_data, nir->constant_data_size + sizeof(remap_table)); + memcpy(const_data + constant_data_aligned_size, remap_table, sizeof(remap_table)); + g.add_const_data(const_data, constant_data_aligned_size + sizeof(remap_table)); prog_data->wa_18019110168_mapping_offset = - prog_data->base.base.const_data_offset + nir->constant_data_size; + prog_data->base.base.const_data_offset + constant_data_aligned_size; } else { g.add_const_data(nir->constant_data, nir->constant_data_size); } diff --git a/src/intel/compiler/brw/brw_compiler.h b/src/intel/compiler/brw/brw_compiler.h index 98b4097d5a0..2afe5ad99ed 100644 --- a/src/intel/compiler/brw/brw_compiler.h +++ b/src/intel/compiler/brw/brw_compiler.h @@ -1410,12 +1410,12 @@ struct brw_compile_mesh_params { struct brw_mesh_prog_data *prog_data; const struct brw_tue_map *tue_map; - /** Load provoking vertex + /** Load provoking vertex for wa_18019110168 * * The callback returns a 32bit integer representing the provoking vertex. */ - void *load_provoking_vertex_data; - nir_def *(*load_provoking_vertex)(nir_builder *b, void *data); + void *wa_18019110168_data; + nir_def *(*wa_18019110168_load_provoking_vertex)(nir_builder *b, void *data); }; const unsigned * @@ -1439,6 +1439,14 @@ struct brw_compile_fs_params { bool allow_spilling; bool use_rep_send; uint8_t max_polygons; + + /** Load per primitive remapping offset for wa_18019110168 + * + * The callback returns a 32bit integer representing the offset of the + * table in the instruction heap. + */ + void *wa_18019110168_data; + nir_def *(*wa_18019110168_load_per_primitive_remap_table_offset)(nir_builder *b, void *data); }; /** diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index f1aa0d5ac1a..c84ba0cbff9 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -4157,11 +4157,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons)); break; - case nir_intrinsic_load_per_primitive_remap_intel: - bld.MOV(retype(dest, BRW_TYPE_UD), - brw_dynamic_per_primitive_remap(brw_fs_prog_data(s.prog_data))); - break; - default: brw_from_nir_emit_intrinsic(ntb, bld, instr); break; diff --git a/src/intel/compiler/brw/brw_nir_wa_18019110168.c b/src/intel/compiler/brw/brw_nir_wa_18019110168.c index a7c62ce6f03..8110059e6fc 100644 --- a/src/intel/compiler/brw/brw_nir_wa_18019110168.c +++ b/src/intel/compiler/brw/brw_nir_wa_18019110168.c @@ -231,7 +231,8 @@ mesh_convert_attrs_prim_to_vert(struct nir_shader *nir, nir_def *zero = nir_imm_int(b, 0); nir_def *provoking_vertex = - params->load_provoking_vertex(b, params->load_provoking_vertex_data); + params->wa_18019110168_load_provoking_vertex( + b, params->wa_18019110168_data); nir_def *local_invocation_index = nir_load_local_invocation_index(b); nir_def *cmp = nir_ieq(b, local_invocation_index, zero); @@ -500,7 +501,8 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, nir_function_impl *impl = nir_shader_get_entrypoint(nir); nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b; - uint64_t remapped_inputs = 0; + uint64_t old_per_primitive_inputs = 0; + uint64_t new_per_vertex_inputs = 0; nir_foreach_shader_in_variable_safe(var, nir) { gl_varying_slot location = var->data.location; if (location == VARYING_SLOT_PRIMITIVE_COUNT || @@ -524,10 +526,13 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, new_var->data.interpolation = INTERP_MODE_FLAT; new_derefs[location] = nir_build_deref_var(b, new_var); + + old_per_primitive_inputs |= BITFIELD64_BIT(location); + new_per_vertex_inputs |= BITFIELD64_BIT(new_location); } - nir->info.inputs_read |= remapped_inputs; - nir->info.per_primitive_inputs &= ~remapped_inputs; + nir->info.inputs_read |= new_per_vertex_inputs; + nir->info.per_primitive_inputs &= ~old_per_primitive_inputs; NIR_PASS(_, nir, frag_update_derefs, new_derefs); @@ -571,7 +576,8 @@ brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir, nir_def *remap_table_addr = nir_pack_64_2x32_split( b, - nir_load_per_primitive_remap_intel(b), + params->wa_18019110168_load_per_primitive_remap_table_offset( + b, params->wa_18019110168_data), nir_load_reloc_const_intel( b, BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH)); u_foreach_bit64(location, per_primitive_inputs) { diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index 64557379373..51004548ce8 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -67,8 +67,8 @@ adjust_driver_push_values(nir_shader *nir, if (data->needs_wa_18019110168) { const uint32_t fs_per_prim_remap_start = - anv_drv_const_offset(gfx.fs_per_prim_remap_offset); - assert(anv_drv_const_size(gfx.fs_per_prim_remap_offset) <= 4); + anv_drv_const_offset(gfx.wa_18019110168); + assert(anv_drv_const_size(gfx.wa_18019110168) <= 4); BITSET_SET(data->push_dwords, fs_per_prim_remap_start / 4); } } @@ -76,8 +76,8 @@ adjust_driver_push_values(nir_shader *nir, if (nir->info.stage == MESA_SHADER_MESH && brw_nir_mesh_shader_needs_wa_18019110168(devinfo, nir)) { const uint32_t mesh_provoking_vertex_start = - anv_drv_const_offset(gfx.mesh_provoking_vertex); - assert(anv_drv_const_size(gfx.mesh_provoking_vertex) <= 4); + anv_drv_const_offset(gfx.wa_18019110168); + assert(anv_drv_const_size(gfx.wa_18019110168) <= 4); BITSET_SET(data->push_dwords, mesh_provoking_vertex_start / 4); } @@ -811,7 +811,7 @@ anv_nir_compute_push_layout(nir_shader *nir, } if (data.needs_wa_18019110168) { const uint32_t fs_per_prim_remap_offset = - anv_drv_const_offset(gfx.fs_per_prim_remap_offset); + anv_drv_const_offset(gfx.wa_18019110168); assert(fs_per_prim_remap_offset >= push_start); fs_prog_data->per_primitive_remap_param = fs_per_prim_remap_offset - push_start; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d474b65d458..14fc3c55b3c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1167,7 +1167,6 @@ enum anv_pipeline_behavior { #define ANV_INLINE_DWORD_PUSH_ADDRESS_LDW (UINT8_MAX - 0) #define ANV_INLINE_DWORD_PUSH_ADDRESS_UDW (UINT8_MAX - 1) -#define ANV_INLINE_DWORD_MESH_PROVOKING_VERTEX (UINT8_MAX - 2) struct anv_pipeline_bind_map { unsigned char surface_blake3[BLAKE3_KEY_LEN]; @@ -2055,6 +2054,7 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */ ANV_GFX_STATE_WA_18038825448, /* Fake state to implement workaround */ ANV_GFX_STATE_WA_14024997852, /* Fake state to implement workaround */ + ANV_GFX_STATE_WA_18019110168, /* Fake state to implement workaround */ ANV_GFX_STATE_TBIMR_TILE_PASS_INFO, ANV_GFX_STATE_FS_CONFIG, ANV_GFX_STATE_TESS_CONFIG, @@ -2462,9 +2462,9 @@ struct anv_gfx_dynamic_state { uint32_t tess_config; /** - * Provoking vertex index, sent to the mesh shader for Wa_18019110168. + * Prepared value for anv_push_constants::gfx::wa_18019110168. */ - uint32_t mesh_provoking_vertex; + uint32_t wa_18019110168; bool pma_fix; @@ -4396,9 +4396,13 @@ struct anv_push_constants { /** Robust access pushed registers. */ uint8_t push_reg_mask[MESA_SHADER_STAGES][4]; - /** Wa_18019110168 */ - uint16_t mesh_provoking_vertex; - uint16_t fs_per_prim_remap_offset; + /** Wa_18019110168 + * bits 4:0 : provoking vertex value + * bits 31:5 : per primitive table remapping offset + */ +#define ANV_WA_18019110168_PROVOKING_VERTEX_MASK ((1u << 5) - 1) +#define ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK (~ANV_WA_18019110168_PROVOKING_VERTEX_MASK) + uint32_t wa_18019110168; } gfx; struct { diff --git a/src/intel/vulkan/anv_shader_compile.c b/src/intel/vulkan/anv_shader_compile.c index 956b3b80c6d..d182eb17a66 100644 --- a/src/intel/vulkan/anv_shader_compile.c +++ b/src/intel/vulkan/anv_shader_compile.c @@ -991,22 +991,42 @@ anv_shader_compile_task(struct anv_device *device, } static nir_def * -mesh_load_provoking_vertex(nir_builder *b, void *data) +wa_18019110168_load_provoking_vertex(nir_builder *b, void *data) { const struct anv_pipeline_bind_map *bind_map = data; + nir_def *val = NULL; for (uint32_t i = 0; i < bind_map->inline_dwords_count; i++) { - if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.mesh_provoking_vertex)) { - return nir_load_inline_data_intel( - b, 1, 16, nir_imm_int(b, 0), - .base = i * 4 + anv_drv_const_offset(gfx.mesh_provoking_vertex) % 4); + if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.wa_18019110168)) { + val = nir_load_inline_data_intel( + b, 1, 32, nir_imm_int(b, 0), + .base = i * 4); + break; } } - return nir_load_push_data_intel(b, 1, 16, nir_imm_int(b, 0), - .base = anv_drv_const_offset(gfx.mesh_provoking_vertex) - - bind_map->push_ranges[0].start, - .range = anv_drv_const_size(gfx.mesh_provoking_vertex)); + if (val == NULL) { + val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0), + .base = anv_drv_const_offset(gfx.wa_18019110168) - + bind_map->push_ranges[0].start * 32, + .range = anv_drv_const_size(gfx.wa_18019110168)); + } + + return nir_iand_imm(b, val, ANV_WA_18019110168_PROVOKING_VERTEX_MASK); +} + +static nir_def * +wa_18019110168_load_per_primitive_remap_table(nir_builder *b, void *data) +{ + const struct anv_pipeline_bind_map *bind_map = data; + nir_def *val = NULL; + + val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0), + .base = anv_drv_const_offset(gfx.wa_18019110168) - + bind_map->push_ranges[0].start * 32, + .range = anv_drv_const_size(gfx.wa_18019110168)); + + return nir_iand_imm(b, val, ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK); } static void @@ -1035,8 +1055,9 @@ anv_shader_compile_mesh(struct anv_device *device, .tue_map = task_shader_data ? &task_shader_data->prog_data.task.map : NULL, - .load_provoking_vertex = mesh_load_provoking_vertex, - .load_provoking_vertex_data = (void *)&mesh_shader_data->bind_map, + .wa_18019110168_load_provoking_vertex = + wa_18019110168_load_provoking_vertex, + .wa_18019110168_data = (void *)&mesh_shader_data->bind_map, }; mesh_shader_data->code = (void *)brw_compile_mesh(compiler, ¶ms); @@ -1082,6 +1103,10 @@ anv_shader_compile_fs(struct anv_device *device, .allow_spilling = true, .max_polygons = UCHAR_MAX, + + .wa_18019110168_load_per_primitive_remap_table_offset = + wa_18019110168_load_per_primitive_remap_table, + .wa_18019110168_data = (void *)&shader_data->bind_map, }; if (intel_use_jay(devinfo, nir->info.stage)) { diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index 24de1fa6170..7d6377df206 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -628,13 +628,6 @@ fill_inline_params(uint32_t *inline_data, case ANV_INLINE_DWORD_PUSH_ADDRESS_UDW: inline_data[i] = push_addr64 >> 32; break; - case anv_drv_const_dword(gfx.mesh_provoking_vertex): { - const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx); - inline_data[i] = gfx->dyn_state.mesh_provoking_vertex | - ((gfx->shaders[MESA_SHADER_MESH]->kernel.offset + - mesh_prog_data->wa_18019110168_mapping_offset) >> 16); - break; - } default: inline_data[i] = push_data[bind_map->inline_dwords[i]]; break; diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 1b52673097a..8872eff550c 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -876,6 +876,15 @@ update_fs_config(struct anv_gfx_dynamic_state *hw_state, }); SET(FS_CONFIG, fs_config, fs_config); + +#if INTEL_WA_18019110168_GFX_VER + if (mesh_prog_data && mesh_prog_data->map.wa_18019110168_active) { + SET(WA_18019110168, wa_18019110168, + (GET(wa_18019110168) & ~ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK) | + ((gfx->shaders[MESA_SHADER_MESH]->kernel.offset + + mesh_prog_data->wa_18019110168_mapping_offset))); + } +#endif } static bool @@ -2313,6 +2322,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, update_sbe(hw_state, gfx, device); if ((gfx->dirty & ANV_CMD_DIRTY_PS) || +#if INTEL_WA_18019110168_GFX_VER + (gfx->dirty & ANV_CMD_DIRTY_MESH) || +#endif BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) || @@ -2587,9 +2599,10 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, ((gfx->dirty & ANV_CMD_DIRTY_MESH) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)); if (mesh_provoking_vertex_update) { - SET(MESH_PROVOKING_VERTEX, mesh_provoking_vertex, - compute_mesh_provoking_vertex( - mesh_prog_data, dyn)); + SET(WA_18019110168, wa_18019110168, + (GET(wa_18019110168) & ~ANV_WA_18019110168_PROVOKING_VERTEX_MASK) | + compute_mesh_provoking_vertex( + mesh_prog_data, dyn)); } #endif } @@ -3663,27 +3676,22 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) } #endif -#if INTEL_WA_18019110168_GFX_VER - if (IS_DIRTY(MESH_PROVOKING_VERTEX)) - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_MESH_BIT_EXT; -#endif - if (IS_DIRTY(FS_CONFIG)) { push_consts->gfx.fs_config = hw_state->fs_config; - -#if INTEL_WA_18019110168_GFX_VER - const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx); - if (mesh_prog_data) { - push_consts->gfx.fs_per_prim_remap_offset = - gfx->shaders[MESA_SHADER_MESH]->kernel.offset + - mesh_prog_data->wa_18019110168_mapping_offset; - } -#endif - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; gfx->base.push_constants_data_dirty = true; } +#if INTEL_WA_18019110168_GFX_VER + if (IS_DIRTY(WA_18019110168)) { + push_consts->gfx.wa_18019110168 = hw_state->wa_18019110168; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_MESH_BIT_EXT | + VK_SHADER_STAGE_FRAGMENT_BIT; + gfx->base.push_constants_data_dirty = true; + } +#endif + + #define anv_batch_emit_gfx(batch, cmd, name) ({ \ void *__dst = anv_batch_emit_dwords( \ batch, __anv_cmd_length(cmd)); \