anv/brw/nir: fix wa_18019110168

Several things were wrong :
  - incorrect offset in the FS push constant data
  - incorrect encoding of the 32bit values with 2 fields (remap table offset & provoking vertex)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
Lionel Landwerlin 2026-03-31 11:27:49 +03:00 committed by Marge Bot
parent b8f5b58bf1
commit c30a4d4fdb
11 changed files with 104 additions and 68 deletions

View file

@ -356,7 +356,6 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_fs_z_c0_intel:
case nir_intrinsic_load_ray_base_mem_addr_intel:
case nir_intrinsic_load_ray_hw_stack_size_intel:
case nir_intrinsic_load_per_primitive_remap_intel:
case nir_intrinsic_load_core_count_arm:
case nir_intrinsic_load_core_max_id_arm:
case nir_intrinsic_load_warp_max_id_arm:

View file

@ -2800,9 +2800,6 @@ system_value("tess_config_intel", 1)
# Dynamic fragment shader parameters (see intel_fs_config) .
system_value("fs_config_intel", 1)
# Per primitive remapping table offset.
system_value("per_primitive_remap_intel", 1)
# The (linear) local invocation index provided in the payload of mesh/task shaders.
system_value("local_invocation_index_intel", 1)

View file

@ -1158,14 +1158,15 @@ brw_compile_mesh(const struct brw_compiler *compiler,
if (wa_18019110168_mapping[i] != -1)
remap_table[i] = prog_data->map.vue_map.varying_to_slot[wa_18019110168_mapping[i]];
}
uint32_t constant_data_aligned_size = align(nir->constant_data_size, 32);
uint8_t *const_data =
(uint8_t *) rzalloc_size(params->base.mem_ctx,
nir->constant_data_size + sizeof(remap_table));
constant_data_aligned_size + sizeof(remap_table));
memcpy(const_data, nir->constant_data, nir->constant_data_size);
memcpy(const_data + nir->constant_data_size, remap_table, sizeof(remap_table));
g.add_const_data(const_data, nir->constant_data_size + sizeof(remap_table));
memcpy(const_data + constant_data_aligned_size, remap_table, sizeof(remap_table));
g.add_const_data(const_data, constant_data_aligned_size + sizeof(remap_table));
prog_data->wa_18019110168_mapping_offset =
prog_data->base.base.const_data_offset + nir->constant_data_size;
prog_data->base.base.const_data_offset + constant_data_aligned_size;
} else {
g.add_const_data(nir->constant_data, nir->constant_data_size);
}

View file

@ -1410,12 +1410,12 @@ struct brw_compile_mesh_params {
struct brw_mesh_prog_data *prog_data;
const struct brw_tue_map *tue_map;
/** Load provoking vertex
/** Load provoking vertex for wa_18019110168
*
* The callback returns a 32bit integer representing the provoking vertex.
*/
void *load_provoking_vertex_data;
nir_def *(*load_provoking_vertex)(nir_builder *b, void *data);
void *wa_18019110168_data;
nir_def *(*wa_18019110168_load_provoking_vertex)(nir_builder *b, void *data);
};
const unsigned *
@ -1439,6 +1439,14 @@ struct brw_compile_fs_params {
bool allow_spilling;
bool use_rep_send;
uint8_t max_polygons;
/** Load per primitive remapping offset for wa_18019110168
*
* The callback returns a 32bit integer representing the offset of the
* table in the instruction heap.
*/
void *wa_18019110168_data;
nir_def *(*wa_18019110168_load_per_primitive_remap_table_offset)(nir_builder *b, void *data);
};
/**

View file

@ -4157,11 +4157,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
break;
case nir_intrinsic_load_per_primitive_remap_intel:
bld.MOV(retype(dest, BRW_TYPE_UD),
brw_dynamic_per_primitive_remap(brw_fs_prog_data(s.prog_data)));
break;
default:
brw_from_nir_emit_intrinsic(ntb, bld, instr);
break;

View file

@ -231,7 +231,8 @@ mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
nir_def *zero = nir_imm_int(b, 0);
nir_def *provoking_vertex =
params->load_provoking_vertex(b, params->load_provoking_vertex_data);
params->wa_18019110168_load_provoking_vertex(
b, params->wa_18019110168_data);
nir_def *local_invocation_index = nir_load_local_invocation_index(b);
nir_def *cmp = nir_ieq(b, local_invocation_index, zero);
@ -500,7 +501,8 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
uint64_t remapped_inputs = 0;
uint64_t old_per_primitive_inputs = 0;
uint64_t new_per_vertex_inputs = 0;
nir_foreach_shader_in_variable_safe(var, nir) {
gl_varying_slot location = var->data.location;
if (location == VARYING_SLOT_PRIMITIVE_COUNT ||
@ -524,10 +526,13 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
new_var->data.interpolation = INTERP_MODE_FLAT;
new_derefs[location] = nir_build_deref_var(b, new_var);
old_per_primitive_inputs |= BITFIELD64_BIT(location);
new_per_vertex_inputs |= BITFIELD64_BIT(new_location);
}
nir->info.inputs_read |= remapped_inputs;
nir->info.per_primitive_inputs &= ~remapped_inputs;
nir->info.inputs_read |= new_per_vertex_inputs;
nir->info.per_primitive_inputs &= ~old_per_primitive_inputs;
NIR_PASS(_, nir, frag_update_derefs, new_derefs);
@ -571,7 +576,8 @@ brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir,
nir_def *remap_table_addr =
nir_pack_64_2x32_split(
b,
nir_load_per_primitive_remap_intel(b),
params->wa_18019110168_load_per_primitive_remap_table_offset(
b, params->wa_18019110168_data),
nir_load_reloc_const_intel(
b, BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH));
u_foreach_bit64(location, per_primitive_inputs) {

View file

@ -67,8 +67,8 @@ adjust_driver_push_values(nir_shader *nir,
if (data->needs_wa_18019110168) {
const uint32_t fs_per_prim_remap_start =
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
assert(anv_drv_const_size(gfx.fs_per_prim_remap_offset) <= 4);
anv_drv_const_offset(gfx.wa_18019110168);
assert(anv_drv_const_size(gfx.wa_18019110168) <= 4);
BITSET_SET(data->push_dwords, fs_per_prim_remap_start / 4);
}
}
@ -76,8 +76,8 @@ adjust_driver_push_values(nir_shader *nir,
if (nir->info.stage == MESA_SHADER_MESH &&
brw_nir_mesh_shader_needs_wa_18019110168(devinfo, nir)) {
const uint32_t mesh_provoking_vertex_start =
anv_drv_const_offset(gfx.mesh_provoking_vertex);
assert(anv_drv_const_size(gfx.mesh_provoking_vertex) <= 4);
anv_drv_const_offset(gfx.wa_18019110168);
assert(anv_drv_const_size(gfx.wa_18019110168) <= 4);
BITSET_SET(data->push_dwords, mesh_provoking_vertex_start / 4);
}
@ -811,7 +811,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
}
if (data.needs_wa_18019110168) {
const uint32_t fs_per_prim_remap_offset =
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
anv_drv_const_offset(gfx.wa_18019110168);
assert(fs_per_prim_remap_offset >= push_start);
fs_prog_data->per_primitive_remap_param =
fs_per_prim_remap_offset - push_start;

View file

@ -1167,7 +1167,6 @@ enum anv_pipeline_behavior {
#define ANV_INLINE_DWORD_PUSH_ADDRESS_LDW (UINT8_MAX - 0)
#define ANV_INLINE_DWORD_PUSH_ADDRESS_UDW (UINT8_MAX - 1)
#define ANV_INLINE_DWORD_MESH_PROVOKING_VERTEX (UINT8_MAX - 2)
struct anv_pipeline_bind_map {
unsigned char surface_blake3[BLAKE3_KEY_LEN];
@ -2055,6 +2054,7 @@ enum anv_gfx_state_bits {
ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
ANV_GFX_STATE_WA_18038825448, /* Fake state to implement workaround */
ANV_GFX_STATE_WA_14024997852, /* Fake state to implement workaround */
ANV_GFX_STATE_WA_18019110168, /* Fake state to implement workaround */
ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
ANV_GFX_STATE_FS_CONFIG,
ANV_GFX_STATE_TESS_CONFIG,
@ -2462,9 +2462,9 @@ struct anv_gfx_dynamic_state {
uint32_t tess_config;
/**
* Provoking vertex index, sent to the mesh shader for Wa_18019110168.
* Prepared value for anv_push_constants::gfx::wa_18019110168.
*/
uint32_t mesh_provoking_vertex;
uint32_t wa_18019110168;
bool pma_fix;
@ -4396,9 +4396,13 @@ struct anv_push_constants {
/** Robust access pushed registers. */
uint8_t push_reg_mask[MESA_SHADER_STAGES][4];
/** Wa_18019110168 */
uint16_t mesh_provoking_vertex;
uint16_t fs_per_prim_remap_offset;
/** Wa_18019110168
* bits 4:0 : provoking vertex value
* bits 31:5 : per primitive table remapping offset
*/
#define ANV_WA_18019110168_PROVOKING_VERTEX_MASK ((1u << 5) - 1)
#define ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK (~ANV_WA_18019110168_PROVOKING_VERTEX_MASK)
uint32_t wa_18019110168;
} gfx;
struct {

View file

@ -991,22 +991,42 @@ anv_shader_compile_task(struct anv_device *device,
}
static nir_def *
mesh_load_provoking_vertex(nir_builder *b, void *data)
wa_18019110168_load_provoking_vertex(nir_builder *b, void *data)
{
const struct anv_pipeline_bind_map *bind_map = data;
nir_def *val = NULL;
for (uint32_t i = 0; i < bind_map->inline_dwords_count; i++) {
if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.mesh_provoking_vertex)) {
return nir_load_inline_data_intel(
b, 1, 16, nir_imm_int(b, 0),
.base = i * 4 + anv_drv_const_offset(gfx.mesh_provoking_vertex) % 4);
if (bind_map->inline_dwords[i] == anv_drv_const_dword(gfx.wa_18019110168)) {
val = nir_load_inline_data_intel(
b, 1, 32, nir_imm_int(b, 0),
.base = i * 4);
break;
}
}
return nir_load_push_data_intel(b, 1, 16, nir_imm_int(b, 0),
.base = anv_drv_const_offset(gfx.mesh_provoking_vertex) -
bind_map->push_ranges[0].start,
.range = anv_drv_const_size(gfx.mesh_provoking_vertex));
if (val == NULL) {
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
.base = anv_drv_const_offset(gfx.wa_18019110168) -
bind_map->push_ranges[0].start * 32,
.range = anv_drv_const_size(gfx.wa_18019110168));
}
return nir_iand_imm(b, val, ANV_WA_18019110168_PROVOKING_VERTEX_MASK);
}
static nir_def *
wa_18019110168_load_per_primitive_remap_table(nir_builder *b, void *data)
{
const struct anv_pipeline_bind_map *bind_map = data;
nir_def *val = NULL;
val = nir_load_push_data_intel(b, 1, 32, nir_imm_int(b, 0),
.base = anv_drv_const_offset(gfx.wa_18019110168) -
bind_map->push_ranges[0].start * 32,
.range = anv_drv_const_size(gfx.wa_18019110168));
return nir_iand_imm(b, val, ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK);
}
static void
@ -1035,8 +1055,9 @@ anv_shader_compile_mesh(struct anv_device *device,
.tue_map = task_shader_data ?
&task_shader_data->prog_data.task.map :
NULL,
.load_provoking_vertex = mesh_load_provoking_vertex,
.load_provoking_vertex_data = (void *)&mesh_shader_data->bind_map,
.wa_18019110168_load_provoking_vertex =
wa_18019110168_load_provoking_vertex,
.wa_18019110168_data = (void *)&mesh_shader_data->bind_map,
};
mesh_shader_data->code = (void *)brw_compile_mesh(compiler, &params);
@ -1082,6 +1103,10 @@ anv_shader_compile_fs(struct anv_device *device,
.allow_spilling = true,
.max_polygons = UCHAR_MAX,
.wa_18019110168_load_per_primitive_remap_table_offset =
wa_18019110168_load_per_primitive_remap_table,
.wa_18019110168_data = (void *)&shader_data->bind_map,
};
if (intel_use_jay(devinfo, nir->info.stage)) {

View file

@ -628,13 +628,6 @@ fill_inline_params(uint32_t *inline_data,
case ANV_INLINE_DWORD_PUSH_ADDRESS_UDW:
inline_data[i] = push_addr64 >> 32;
break;
case anv_drv_const_dword(gfx.mesh_provoking_vertex): {
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
inline_data[i] = gfx->dyn_state.mesh_provoking_vertex |
((gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
mesh_prog_data->wa_18019110168_mapping_offset) >> 16);
break;
}
default:
inline_data[i] = push_data[bind_map->inline_dwords[i]];
break;

View file

@ -876,6 +876,15 @@ update_fs_config(struct anv_gfx_dynamic_state *hw_state,
});
SET(FS_CONFIG, fs_config, fs_config);
#if INTEL_WA_18019110168_GFX_VER
if (mesh_prog_data && mesh_prog_data->map.wa_18019110168_active) {
SET(WA_18019110168, wa_18019110168,
(GET(wa_18019110168) & ~ANV_WA_18019110168_PER_PRIMITIVE_REMAP_TABLE_OFFSET_MASK) |
((gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
mesh_prog_data->wa_18019110168_mapping_offset)));
}
#endif
}
static bool
@ -2313,6 +2322,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
update_sbe(hw_state, gfx, device);
if ((gfx->dirty & ANV_CMD_DIRTY_PS) ||
#if INTEL_WA_18019110168_GFX_VER
(gfx->dirty & ANV_CMD_DIRTY_MESH) ||
#endif
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) ||
@ -2587,9 +2599,10 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
((gfx->dirty & ANV_CMD_DIRTY_MESH) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX));
if (mesh_provoking_vertex_update) {
SET(MESH_PROVOKING_VERTEX, mesh_provoking_vertex,
compute_mesh_provoking_vertex(
mesh_prog_data, dyn));
SET(WA_18019110168, wa_18019110168,
(GET(wa_18019110168) & ~ANV_WA_18019110168_PROVOKING_VERTEX_MASK) |
compute_mesh_provoking_vertex(
mesh_prog_data, dyn));
}
#endif
}
@ -3663,27 +3676,22 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
#endif
#if INTEL_WA_18019110168_GFX_VER
if (IS_DIRTY(MESH_PROVOKING_VERTEX))
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_MESH_BIT_EXT;
#endif
if (IS_DIRTY(FS_CONFIG)) {
push_consts->gfx.fs_config = hw_state->fs_config;
#if INTEL_WA_18019110168_GFX_VER
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
if (mesh_prog_data) {
push_consts->gfx.fs_per_prim_remap_offset =
gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
mesh_prog_data->wa_18019110168_mapping_offset;
}
#endif
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
gfx->base.push_constants_data_dirty = true;
}
#if INTEL_WA_18019110168_GFX_VER
if (IS_DIRTY(WA_18019110168)) {
push_consts->gfx.wa_18019110168 = hw_state->wa_18019110168;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_MESH_BIT_EXT |
VK_SHADER_STAGE_FRAGMENT_BIT;
gfx->base.push_constants_data_dirty = true;
}
#endif
#define anv_batch_emit_gfx(batch, cmd, name) ({ \
void *__dst = anv_batch_emit_dwords( \
batch, __anv_cmd_length(cmd)); \