mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-21 09:50:36 +02:00
anv/brw: move Wa_18019110168 handling to backend
We simplify the implementation by assuming the worse case, copying entire per-vertex regions if necessary. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35103>
This commit is contained in:
parent
8e7e0ef75a
commit
5cc66e2c8d
11 changed files with 634 additions and 590 deletions
|
|
@ -693,19 +693,22 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
|
||||
if (mue_map != NULL) {
|
||||
memcpy(&vue_map, &mue_map->vue_map, sizeof(vue_map));
|
||||
|
||||
memcpy(per_primitive_offsets,
|
||||
mue_map->per_primitive_offsets,
|
||||
sizeof(mue_map->per_primitive_offsets));
|
||||
|
||||
u_foreach_bit64(location, per_primitive_inputs) {
|
||||
assert(per_primitive_offsets[location] != -1);
|
||||
if (!mue_map->wa_18019110168_active) {
|
||||
u_foreach_bit64(location, per_primitive_inputs) {
|
||||
assert(per_primitive_offsets[location] != -1);
|
||||
|
||||
first_read_offset = MIN2(first_read_offset,
|
||||
(uint32_t)per_primitive_offsets[location]);
|
||||
per_primitive_stride =
|
||||
MAX2((uint32_t)per_primitive_offsets[location] + 16,
|
||||
per_primitive_stride);
|
||||
first_read_offset = MIN2(first_read_offset,
|
||||
(uint32_t)per_primitive_offsets[location]);
|
||||
per_primitive_stride =
|
||||
MAX2((uint32_t)per_primitive_offsets[location] + 16,
|
||||
per_primitive_stride);
|
||||
}
|
||||
} else {
|
||||
first_read_offset = per_primitive_stride = 0;
|
||||
}
|
||||
} else {
|
||||
brw_compute_vue_map(devinfo, &vue_map, inputs_read,
|
||||
|
|
@ -1465,6 +1468,12 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
const unsigned max_subgroup_size = 32;
|
||||
|
||||
brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size);
|
||||
|
||||
if (params->mue_map && params->mue_map->wa_18019110168_active) {
|
||||
brw_nir_frag_convert_attrs_prim_to_vert(
|
||||
nir, params->mue_map->per_primitive_offsets);
|
||||
}
|
||||
|
||||
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
||||
brw_nir_lower_fs_outputs(nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -520,7 +520,8 @@ static void
|
|||
brw_compute_mue_map(const struct brw_compiler *compiler,
|
||||
nir_shader *nir, struct brw_mue_map *map,
|
||||
enum brw_mesh_index_format index_format,
|
||||
enum intel_vue_layout vue_layout)
|
||||
enum intel_vue_layout vue_layout,
|
||||
int *wa_18019110168_mapping)
|
||||
{
|
||||
memset(map, 0, sizeof(*map));
|
||||
|
||||
|
|
@ -586,6 +587,23 @@ brw_compute_mue_map(const struct brw_compiler *compiler,
|
|||
map->per_primitive_offsets[VARYING_SLOT_CULL_PRIMITIVE] = 12;
|
||||
}
|
||||
|
||||
/* If Wa_18019110168 is active, store the remapping in the
|
||||
* per_primitive_offsets array.
|
||||
*/
|
||||
if (wa_18019110168_mapping) {
|
||||
map->wa_18019110168_active = true;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(map->per_primitive_offsets); i++) {
|
||||
if (i == VARYING_SLOT_PRIMITIVE_COUNT ||
|
||||
i == VARYING_SLOT_PRIMITIVE_INDICES ||
|
||||
i == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
|
||||
i == VARYING_SLOT_LAYER ||
|
||||
i == VARYING_SLOT_VIEWPORT ||
|
||||
i == VARYING_SLOT_CULL_PRIMITIVE)
|
||||
continue;
|
||||
map->per_primitive_offsets[i] = wa_18019110168_mapping[i];
|
||||
}
|
||||
}
|
||||
|
||||
map->per_primitive_stride = align(map->per_primitive_stride, 32);
|
||||
|
||||
map->size += map->per_primitive_stride * map->max_primitives;
|
||||
|
|
@ -1158,6 +1176,19 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
|||
nir->info.clip_distance_array_size;
|
||||
prog_data->primitive_type = nir->info.mesh.primitive_type;
|
||||
|
||||
/* Apply this workaround before trying to pack indices because this can
|
||||
* increase the number of vertices and therefore change the decision about
|
||||
* packing.
|
||||
*/
|
||||
const bool apply_wa_18019110168 =
|
||||
brw_nir_mesh_shader_needs_wa_18019110168(devinfo, nir);
|
||||
int wa_18019110168_mapping[VARYING_SLOT_MAX];
|
||||
memset(wa_18019110168_mapping, -1, sizeof(wa_18019110168_mapping));
|
||||
if (apply_wa_18019110168) {
|
||||
brw_nir_mesh_convert_attrs_prim_to_vert(nir, params,
|
||||
wa_18019110168_mapping);
|
||||
}
|
||||
|
||||
struct index_packing_state index_packing_state = {};
|
||||
if (brw_can_pack_primitive_indices(nir, &index_packing_state)) {
|
||||
if (index_packing_state.original_prim_indices)
|
||||
|
|
@ -1170,15 +1201,16 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
|||
prog_data->uses_drawid =
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
|
||||
|
||||
brw_nir_lower_tue_inputs(nir, params->tue_map);
|
||||
|
||||
NIR_PASS(_, nir, brw_nir_lower_mesh_primitive_count);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
|
||||
|
||||
brw_nir_lower_tue_inputs(nir, params->tue_map);
|
||||
|
||||
brw_compute_mue_map(compiler, nir, &prog_data->map,
|
||||
prog_data->index_format,
|
||||
key->base.vue_layout);
|
||||
key->base.vue_layout,
|
||||
apply_wa_18019110168 ? wa_18019110168_mapping : NULL);
|
||||
brw_nir_lower_mue_outputs(nir, &prog_data->map);
|
||||
|
||||
prog_data->autostrip_enable = brw_mesh_autostrip_enable(compiler, nir, &prog_data->map);
|
||||
|
|
|
|||
|
|
@ -41,10 +41,14 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
struct ra_regs;
|
||||
struct nir_builder;
|
||||
struct nir_def;
|
||||
struct nir_shader;
|
||||
struct shader_info;
|
||||
|
||||
struct nir_shader_compiler_options;
|
||||
typedef struct nir_builder nir_builder;
|
||||
typedef struct nir_def nir_def;
|
||||
typedef struct nir_shader nir_shader;
|
||||
|
||||
#define REG_CLASS_COUNT 20
|
||||
|
|
@ -1271,6 +1275,8 @@ struct brw_mue_map {
|
|||
/* VUE map for the per vertex attributes */
|
||||
struct intel_vue_map vue_map;
|
||||
|
||||
bool wa_18019110168_active;
|
||||
|
||||
/* Offset in bytes of each per primitive relative to
|
||||
* per_primitive_offset (-1 if unused)
|
||||
*/
|
||||
|
|
@ -1520,6 +1526,13 @@ struct brw_compile_mesh_params {
|
|||
const struct brw_mesh_prog_key *key;
|
||||
struct brw_mesh_prog_data *prog_data;
|
||||
const struct brw_tue_map *tue_map;
|
||||
|
||||
/** Load provoking vertex
|
||||
*
|
||||
* The callback returns a 32bit integer representing the provoking vertex.
|
||||
*/
|
||||
void *load_provoking_vertex_data;
|
||||
nir_def *(*load_provoking_vertex)(nir_builder *b, void *data);
|
||||
};
|
||||
|
||||
const unsigned *
|
||||
|
|
|
|||
|
|
@ -322,6 +322,27 @@ nir_variable *
|
|||
brw_nir_find_complete_variable_with_location(nir_shader *shader,
|
||||
nir_variable_mode mode,
|
||||
int location);
|
||||
|
||||
static inline bool
|
||||
brw_nir_mesh_shader_needs_wa_18019110168(const struct intel_device_info *devinfo,
|
||||
nir_shader *shader)
|
||||
{
|
||||
return intel_needs_workaround(devinfo, 18019110168) &&
|
||||
(shader->info.outputs_written & (VARYING_BIT_CLIP_DIST0 |
|
||||
VARYING_BIT_CLIP_DIST1)) &&
|
||||
(shader->info.per_primitive_outputs & ~(VARYING_BIT_PRIMITIVE_INDICES |
|
||||
VARYING_BIT_PRIMITIVE_COUNT));
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
struct brw_compile_mesh_params *params,
|
||||
int *wa_mapping);
|
||||
|
||||
bool
|
||||
brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
const int *wa_mapping);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
537
src/intel/compiler/brw_nir_wa_18019110168.c
Normal file
537
src/intel/compiler/brw_nir_wa_18019110168.c
Normal file
|
|
@ -0,0 +1,537 @@
|
|||
/*
|
||||
* Copyright (c) 2022-2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
|
||||
/*
|
||||
* Wa_18019110168 for gfx 12.5.
|
||||
*
|
||||
* This file implements workaround for HW bug, which leads to fragment shader
|
||||
* reading incorrect per-primitive data if mesh shader, in addition to writing
|
||||
* per-primitive data, also writes to gl_ClipDistance.
|
||||
*
|
||||
* The suggested solution to that bug is to not use per-primitive data by:
|
||||
* - creating new vertices for provoking vertices shared by multiple primitives
|
||||
* - converting per-primitive attributes read by fragment shader to flat
|
||||
* per-vertex attributes for the provoking vertex
|
||||
* - modifying fragment shader to read those per-vertex attributes
|
||||
*
|
||||
* There are at least 2 type of failures not handled very well:
|
||||
* - if the number of varying slots overflows, than only some attributes will
|
||||
* be converted, leading to corruption of those unconverted attributes
|
||||
* - if the overall MUE size is so large it doesn't fit in URB, then URB
|
||||
* allocation will fail in some way; unfortunately there's no good way to
|
||||
* say how big MUE will be at this moment and back out
|
||||
*/
|
||||
static bool
|
||||
copy_primitive_count_write(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_set_vertex_and_primitive_count)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
nir_variable *primitive_count = (nir_variable *)data;
|
||||
nir_store_var(b, primitive_count, intrin->src[1].ssa, 0x1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
copy_primitive_count_writes(nir_shader *nir)
|
||||
{
|
||||
nir_variable *primitive_count =
|
||||
nir_local_variable_create(nir_shader_get_entrypoint(nir),
|
||||
glsl_uint_type(),
|
||||
"Wa_18019110168_primitive_count");
|
||||
|
||||
nir_shader_intrinsics_pass(nir,
|
||||
copy_primitive_count_write,
|
||||
nir_metadata_control_flow,
|
||||
primitive_count);
|
||||
|
||||
return primitive_count;
|
||||
}
|
||||
|
||||
struct mapping {
|
||||
nir_variable *temp_var;
|
||||
nir_deref_instr *per_prim_deref;
|
||||
nir_deref_instr *per_vert_deref;
|
||||
};
|
||||
|
||||
static bool
|
||||
rewrite_derefs_to_per_prim_vars(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref &&
|
||||
intrin->intrinsic != nir_intrinsic_store_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *old_deref =
|
||||
nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
|
||||
nir_variable *var = nir_deref_instr_get_variable(old_deref);
|
||||
if (var == NULL)
|
||||
return false;
|
||||
|
||||
struct mapping *mapping = data;
|
||||
if (mapping[var->data.location].temp_var == NULL)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_deref_instr *new_deref = nir_clone_deref_instr(
|
||||
b, mapping[var->data.location].temp_var, old_deref);
|
||||
|
||||
nir_src_rewrite(&intrin->src[0], &new_deref->def);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
struct brw_compile_mesh_params *params,
|
||||
int *wa_mapping)
|
||||
{
|
||||
const uint64_t outputs_written = nir->info.outputs_written;
|
||||
const uint64_t per_primitive_outputs =
|
||||
nir->info.per_primitive_outputs &
|
||||
~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES);
|
||||
const uint64_t other_outputs = outputs_written & ~per_primitive_outputs;
|
||||
|
||||
uint64_t all_outputs = outputs_written;
|
||||
|
||||
const uint64_t remapped_outputs = outputs_written &
|
||||
nir->info.per_primitive_outputs &
|
||||
~(BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE) |
|
||||
BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES) |
|
||||
BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_COUNT) |
|
||||
BITFIELD64_BIT(VARYING_SLOT_LAYER) |
|
||||
BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) |
|
||||
BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE));
|
||||
|
||||
/* indexed by slot of per-prim attribute */
|
||||
struct mapping mapping[VARYING_SLOT_MAX] = { {NULL, NULL, NULL}, };
|
||||
|
||||
/* Figure out the mapping between per-primitive and new per-vertex outputs. */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
int location = var->data.location;
|
||||
|
||||
if (!(BITFIELD64_BIT(location) & remapped_outputs))
|
||||
continue;
|
||||
|
||||
assert(location == VARYING_SLOT_PRIMITIVE_ID ||
|
||||
location >= VARYING_SLOT_VAR0);
|
||||
|
||||
const struct glsl_type *type = var->type;
|
||||
if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
|
||||
assert(glsl_type_is_array(type));
|
||||
type = glsl_get_array_element(type);
|
||||
}
|
||||
|
||||
unsigned num_slots = glsl_count_attribute_slots(type, false);
|
||||
|
||||
for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) {
|
||||
uint64_t mask = BITFIELD64_MASK(num_slots) << slot;
|
||||
if ((all_outputs & mask) == 0) {
|
||||
wa_mapping[location] = slot;
|
||||
all_outputs |= mask;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (wa_mapping[location] == 0) {
|
||||
fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
mapping[location].temp_var =
|
||||
nir_local_variable_create(impl,
|
||||
glsl_array_type(type,
|
||||
nir->info.mesh.max_primitives_out,
|
||||
glsl_get_std140_size(type, false)),
|
||||
var->name);
|
||||
}
|
||||
|
||||
/* Rewrite all the per-primitive variable reads/writes to the temporary
|
||||
* variables.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
|
||||
rewrite_derefs_to_per_prim_vars,
|
||||
nir_metadata_control_flow, mapping);
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
unsigned vertices_per_primitive =
|
||||
mesa_vertices_per_prim(nir->info.mesh.primitive_type);
|
||||
|
||||
nir_variable *primitive_count_var = copy_primitive_count_writes(nir);
|
||||
|
||||
nir_builder _b = nir_builder_at(nir_after_impl(impl)), *b = &_b;
|
||||
|
||||
/* wait for all subgroups to finish */
|
||||
nir_barrier(b, SCOPE_WORKGROUP);
|
||||
|
||||
/* Build a list of per-vertex variables we might need to copy */
|
||||
unsigned num_other_variables = 0;
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
|
||||
continue;
|
||||
num_other_variables++;
|
||||
}
|
||||
|
||||
nir_variable *primitive_indices_var = NULL;
|
||||
nir_deref_instr **per_vertex_derefs =
|
||||
ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables);
|
||||
|
||||
unsigned num_per_vertex_variables = 0;
|
||||
unsigned processed = 0;
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
|
||||
continue;
|
||||
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_PRIMITIVE_COUNT:
|
||||
break;
|
||||
case VARYING_SLOT_PRIMITIVE_INDICES:
|
||||
primitive_indices_var = var;
|
||||
break;
|
||||
default: {
|
||||
const struct glsl_type *type = var->type;
|
||||
assert(glsl_type_is_array(type));
|
||||
const struct glsl_type *array_element_type =
|
||||
glsl_get_array_element(type);
|
||||
|
||||
/* Resize type of array output to make space for one extra vertex
|
||||
* attribute for each primitive, so we ensure that the provoking
|
||||
* vertex is not shared between primitives.
|
||||
*/
|
||||
const struct glsl_type *new_type =
|
||||
glsl_array_type(array_element_type,
|
||||
glsl_get_length(type) +
|
||||
nir->info.mesh.max_primitives_out,
|
||||
0);
|
||||
|
||||
var->type = new_type;
|
||||
|
||||
per_vertex_derefs[num_per_vertex_variables++] =
|
||||
nir_build_deref_var(b, var);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
++processed;
|
||||
}
|
||||
assert(processed == num_other_variables);
|
||||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_def *provoking_vertex =
|
||||
params->load_provoking_vertex(b, params->load_provoking_vertex_data);
|
||||
nir_def *local_invocation_index = nir_load_local_invocation_index(b);
|
||||
|
||||
nir_def *cmp = nir_ieq(b, local_invocation_index, zero);
|
||||
nir_if *if_stmt = nir_push_if(b, cmp);
|
||||
{
|
||||
assert(primitive_count_var != NULL);
|
||||
assert(primitive_indices_var != NULL);
|
||||
|
||||
/* Update types of derefs to match type of variables they (de)reference. */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (deref->deref_type != nir_deref_type_var)
|
||||
continue;
|
||||
|
||||
if (deref->var->type != deref->type)
|
||||
deref->type = deref->var->type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Create new per-vertex output variables mirroring per-primitive variables
|
||||
* and create derefs for both old and new variables.
|
||||
*/
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
gl_varying_slot location = var->data.location;
|
||||
|
||||
if ((BITFIELD64_BIT(location) & remapped_outputs) == 0)
|
||||
continue;
|
||||
|
||||
const struct glsl_type *type = var->type;
|
||||
assert(glsl_type_is_array(type));
|
||||
const struct glsl_type *array_element_type = glsl_get_array_element(type);
|
||||
|
||||
const struct glsl_type *new_type =
|
||||
glsl_array_type(array_element_type,
|
||||
nir->info.mesh.max_vertices_out +
|
||||
nir->info.mesh.max_primitives_out,
|
||||
0);
|
||||
|
||||
nir_variable *new_var =
|
||||
nir_variable_create(nir, nir_var_shader_out, new_type, var->name);
|
||||
assert(wa_mapping[location] >= VARYING_SLOT_VAR0);
|
||||
assert(wa_mapping[location] <= VARYING_SLOT_VAR31);
|
||||
new_var->data.location = wa_mapping[location];
|
||||
new_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
|
||||
mapping[location].per_vert_deref = nir_build_deref_var(b, new_var);
|
||||
mapping[location].per_prim_deref = nir_build_deref_var(b, mapping[location].temp_var);
|
||||
}
|
||||
|
||||
nir_def *trueconst = nir_imm_true(b);
|
||||
|
||||
/*
|
||||
* for each Primitive (0 : primitiveCount)
|
||||
* if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]]
|
||||
* create 1 new vertex at offset "Vertex"
|
||||
* copy per vert attributes of provoking vertex to the new one
|
||||
* update PrimitiveIndices[Primitive][provoking vertex]
|
||||
* Vertex++
|
||||
* else
|
||||
* VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true
|
||||
*
|
||||
* for each attribute : mapping
|
||||
* copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex]
|
||||
*/
|
||||
|
||||
/* primitive count */
|
||||
nir_def *primitive_count = nir_load_var(b, primitive_count_var);
|
||||
|
||||
/* primitive index */
|
||||
nir_variable *primitive_var =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "Primitive");
|
||||
nir_deref_instr *primitive_deref = nir_build_deref_var(b, primitive_var);
|
||||
nir_store_deref(b, primitive_deref, zero, 1);
|
||||
|
||||
/* vertex index */
|
||||
nir_variable *vertex_var =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "Vertex");
|
||||
nir_deref_instr *vertex_deref = nir_build_deref_var(b, vertex_var);
|
||||
nir_store_deref(b, vertex_deref, nir_imm_int(b, nir->info.mesh.max_vertices_out), 1);
|
||||
|
||||
/* used vertices bitvector */
|
||||
const struct glsl_type *used_vertex_type =
|
||||
glsl_array_type(glsl_bool_type(),
|
||||
nir->info.mesh.max_vertices_out,
|
||||
0);
|
||||
nir_variable *used_vertex_var =
|
||||
nir_local_variable_create(impl, used_vertex_type, "VertexUsed");
|
||||
nir_deref_instr *used_vertex_deref =
|
||||
nir_build_deref_var(b, used_vertex_var);
|
||||
/* Initialize it as "not used" */
|
||||
for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) {
|
||||
nir_deref_instr *indexed_used_vertex_deref =
|
||||
nir_build_deref_array(b, used_vertex_deref, nir_imm_int(b, i));
|
||||
nir_store_deref(b, indexed_used_vertex_deref, nir_imm_false(b), 1);
|
||||
}
|
||||
|
||||
nir_loop *loop = nir_push_loop(b);
|
||||
{
|
||||
nir_def *primitive_id = nir_load_deref(b, primitive_deref);
|
||||
nir_def *cmp = nir_ige(b, primitive_id, primitive_count);
|
||||
|
||||
nir_push_if(b, cmp);
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_deref_instr *primitive_indices_deref =
|
||||
nir_build_deref_var(b, primitive_indices_var);
|
||||
nir_deref_instr *indexed_primitive_indices_deref;
|
||||
nir_def *src_vertex;
|
||||
nir_def *prim_indices;
|
||||
|
||||
/* array of vectors, we have to extract index out of array deref */
|
||||
indexed_primitive_indices_deref =
|
||||
nir_build_deref_array(b, primitive_indices_deref, primitive_id);
|
||||
prim_indices = nir_load_deref(b, indexed_primitive_indices_deref);
|
||||
src_vertex = nir_vector_extract(b, prim_indices, provoking_vertex);
|
||||
|
||||
nir_def *dst_vertex = nir_load_deref(b, vertex_deref);
|
||||
|
||||
nir_deref_instr *indexed_used_vertex_deref =
|
||||
nir_build_deref_array(b, used_vertex_deref, src_vertex);
|
||||
nir_def *used_vertex = nir_load_deref(b, indexed_used_vertex_deref);
|
||||
|
||||
nir_push_if(b, used_vertex);
|
||||
{
|
||||
/* If the vertex is used by another primitive, we need to make an
|
||||
* entire copy of the per-vertex variables.
|
||||
*/
|
||||
for (unsigned a = 0; a < num_per_vertex_variables; ++a) {
|
||||
nir_deref_instr *attr_arr = per_vertex_derefs[a];
|
||||
nir_deref_instr *src = nir_build_deref_array(b, attr_arr, src_vertex);
|
||||
nir_deref_instr *dst = nir_build_deref_array(b, attr_arr, dst_vertex);
|
||||
|
||||
assert(per_vertex_derefs[a]->instr.type == nir_instr_type_deref);
|
||||
|
||||
nir_copy_deref(b, dst, src);
|
||||
}
|
||||
|
||||
/* Rebuild the vertices indices for the primitive by pointing to
|
||||
* the new provoking vertex */
|
||||
nir_def *new_val =
|
||||
nir_vector_insert(b, prim_indices, dst_vertex, provoking_vertex);
|
||||
nir_store_deref(b, indexed_primitive_indices_deref, new_val,
|
||||
BITFIELD_MASK(vertices_per_primitive));
|
||||
|
||||
nir_store_deref(b, vertex_deref, nir_iadd_imm(b, dst_vertex, 1), 1);
|
||||
|
||||
/* Finally write the per-primitive values into the per-vertex
|
||||
* block at remapped locations.
|
||||
*/
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
|
||||
if (!mapping[i].per_vert_deref)
|
||||
continue;
|
||||
|
||||
assert(mapping[i].per_prim_deref->instr.type == nir_instr_type_deref);
|
||||
|
||||
nir_deref_instr *src =
|
||||
nir_build_deref_array(b, mapping[i].per_prim_deref, primitive_id);
|
||||
nir_deref_instr *dst =
|
||||
nir_build_deref_array(b, mapping[i].per_vert_deref, dst_vertex);
|
||||
|
||||
nir_copy_deref(b, dst, src);
|
||||
}
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* If the vertex is not used yet by any primitive, we just have to
|
||||
* write the per-primitive values into the per-vertex block at
|
||||
* remapped locations.
|
||||
*/
|
||||
nir_store_deref(b, indexed_used_vertex_deref, trueconst, 1);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
|
||||
if (!mapping[i].per_vert_deref)
|
||||
continue;
|
||||
|
||||
assert(mapping[i].per_prim_deref->instr.type == nir_instr_type_deref);
|
||||
|
||||
nir_deref_instr *src =
|
||||
nir_build_deref_array(b, mapping[i].per_prim_deref, primitive_id);
|
||||
nir_deref_instr *dst =
|
||||
nir_build_deref_array(b, mapping[i].per_vert_deref, src_vertex);
|
||||
|
||||
nir_copy_deref(b, dst, src);
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_store_deref(b, primitive_deref, nir_iadd_imm(b, primitive_id, 1), 1);
|
||||
}
|
||||
nir_pop_loop(b, loop);
|
||||
}
|
||||
nir_pop_if(b, if_stmt); /* local_invocation_index == 0 */
|
||||
|
||||
nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out;
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
struct brw_compile_mesh_params *params,
|
||||
int *wa_mapping)
|
||||
{
|
||||
NIR_PASS(_, nir, mesh_convert_attrs_prim_to_vert, params, wa_mapping);
|
||||
|
||||
/* Remove per-primitive references */
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
/* deal with copy_derefs */
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
}
|
||||
|
||||
static bool
|
||||
frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (deref->deref_type != nir_deref_type_var)
|
||||
return false;
|
||||
|
||||
nir_variable *var = deref->var;
|
||||
if (!(var->data.mode & nir_var_shader_in))
|
||||
return false;
|
||||
|
||||
int location = var->data.location;
|
||||
nir_deref_instr **new_derefs = (nir_deref_instr **)data;
|
||||
if (new_derefs[location] == NULL)
|
||||
return false;
|
||||
|
||||
nir_def_replace(&deref->def, &new_derefs[location]->def);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, frag_update_derefs_instr,
|
||||
nir_metadata_none, (void *)mapping);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
const int *wa_mapping)
|
||||
{
|
||||
/* indexed by slot of per-prim attribute */
|
||||
nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, };
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
|
||||
|
||||
uint64_t remapped_inputs = 0;
|
||||
nir_foreach_shader_in_variable_safe(var, nir) {
|
||||
gl_varying_slot location = var->data.location;
|
||||
if (location == VARYING_SLOT_PRIMITIVE_COUNT ||
|
||||
location == VARYING_SLOT_PRIMITIVE_INDICES ||
|
||||
location == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
|
||||
location == VARYING_SLOT_LAYER ||
|
||||
location == VARYING_SLOT_VIEWPORT ||
|
||||
location == VARYING_SLOT_CULL_PRIMITIVE)
|
||||
continue;
|
||||
|
||||
gl_varying_slot new_location = wa_mapping[location];
|
||||
if (new_location == -1)
|
||||
continue;
|
||||
|
||||
assert(wa_mapping[new_location] == -1);
|
||||
|
||||
nir_variable *new_var =
|
||||
nir_variable_create(nir, nir_var_shader_in, var->type, var->name);
|
||||
new_var->data.location = new_location;
|
||||
new_var->data.location_frac = var->data.location_frac;
|
||||
new_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
|
||||
new_derefs[location] = nir_build_deref_var(b, new_var);
|
||||
}
|
||||
|
||||
nir->info.inputs_read |= remapped_inputs;
|
||||
nir->info.per_primitive_inputs &= ~remapped_inputs;
|
||||
|
||||
NIR_PASS(_, nir, frag_update_derefs, new_derefs);
|
||||
|
||||
nir_shader_gather_info(nir, impl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -83,6 +83,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_nir_lower_storage_image.c',
|
||||
'brw_nir_lower_texel_address.c',
|
||||
'brw_nir_lower_texture.c',
|
||||
'brw_nir_wa_18019110168.c',
|
||||
'brw_nir_opt_fsat.c',
|
||||
'brw_nir_rt.h',
|
||||
'brw_nir_rt.c',
|
||||
|
|
|
|||
|
|
@ -55,7 +55,6 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
|
||||
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
|
||||
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
|
||||
DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2)
|
||||
DRI_CONF_ANV_EMULATE_READ_WITHOUT_FORMAT(false)
|
||||
DRI_CONF_FORCE_VK_VENDOR()
|
||||
DRI_CONF_FAKE_SPARSE(false)
|
||||
|
|
@ -174,8 +173,6 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
driQueryOptionb(&instance->dri_options, "no_16bit");
|
||||
instance->intel_enable_wa_14018912822 =
|
||||
driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822");
|
||||
instance->mesh_conv_prim_attrs_to_vert_attrs =
|
||||
driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs");
|
||||
instance->emulate_read_without_format =
|
||||
driQueryOptionb(&instance->dri_options, "anv_emulate_read_without_format");
|
||||
instance->fp64_workaround_enabled =
|
||||
|
|
|
|||
|
|
@ -1,559 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2022 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "anv_nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
/*
|
||||
* Wa_18019110168 for gfx 12.5.
|
||||
*
|
||||
* This file implements workaround for HW bug, which leads to fragment shader
|
||||
* reading incorrect per-primitive data if mesh shader, in addition to writing
|
||||
* per-primitive data, also writes to gl_ClipDistance.
|
||||
*
|
||||
* The suggested solution to that bug is to not use per-primitive data by:
|
||||
* - creating new vertices for provoking vertices shared by multiple primitives
|
||||
* - converting per-primitive attributes read by fragment shader to flat
|
||||
* per-vertex attributes for the provoking vertex
|
||||
* - modifying fragment shader to read those per-vertex attributes
|
||||
*
|
||||
* There are at least 2 type of failures not handled very well:
|
||||
* - if the number of varying slots overflows, than only some attributes will
|
||||
* be converted, leading to corruption of those unconverted attributes
|
||||
* - if the overall MUE size is so large it doesn't fit in URB, then URB
|
||||
* allocation will fail in some way; unfortunately there's no good way to
|
||||
* say how big MUE will be at this moment and back out
|
||||
*
|
||||
* This workaround needs to be applied before linking, so that unused outputs
|
||||
* created by this code are removed at link time.
|
||||
*
|
||||
* This workaround can be controlled by a driconf option to either disable it,
|
||||
* lower its scope or force enable it.
|
||||
*
|
||||
* Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this:
|
||||
* value == 0 - disable workaround
|
||||
* value < 0 - enable ONLY if workaround is required
|
||||
* value > 0 - enable ALWAYS, even if it's not required
|
||||
* abs(value) >= 1 - attribute conversion
|
||||
* abs(value) >= 2 - attribute conversion and vertex duplication
|
||||
*
|
||||
* Default: -2 (both parts of the work around, ONLY if it's required)
|
||||
*
|
||||
*/
|
||||
static bool
|
||||
copy_primitive_count_write(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_set_vertex_and_primitive_count)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
nir_variable *primitive_count = (nir_variable *)data;
|
||||
nir_store_var(b, primitive_count, intrin->src[1].ssa, 0x1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
copy_primitive_count_writes(nir_shader *nir)
|
||||
{
|
||||
nir_variable *primitive_count =
|
||||
nir_local_variable_create(nir_shader_get_entrypoint(nir),
|
||||
glsl_uint_type(),
|
||||
"Wa_18019110168_primitive_count");
|
||||
|
||||
nir_shader_intrinsics_pass(nir,
|
||||
copy_primitive_count_write,
|
||||
nir_metadata_control_flow,
|
||||
primitive_count);
|
||||
|
||||
return primitive_count;
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
gl_varying_slot *wa_mapping,
|
||||
uint64_t fs_inputs,
|
||||
void *mem_ctx,
|
||||
const bool dup_vertices,
|
||||
const bool force_conversion)
|
||||
{
|
||||
uint64_t per_primitive_outputs = nir->info.per_primitive_outputs;
|
||||
per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES);
|
||||
|
||||
if (per_primitive_outputs == 0)
|
||||
return false;
|
||||
|
||||
uint64_t outputs_written = nir->info.outputs_written;
|
||||
uint64_t other_outputs = outputs_written & ~per_primitive_outputs;
|
||||
|
||||
if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0)
|
||||
if (!force_conversion)
|
||||
return false;
|
||||
|
||||
uint64_t all_outputs = outputs_written;
|
||||
unsigned attrs = 0;
|
||||
|
||||
uint64_t remapped_outputs = outputs_written & per_primitive_outputs;
|
||||
remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
|
||||
|
||||
/* Skip locations not read by the fragment shader, because they will
|
||||
* be eliminated at linking time. Note that some fs inputs may be
|
||||
* removed only after optimizations, so it's possible that we will
|
||||
* create too many variables.
|
||||
*/
|
||||
remapped_outputs &= fs_inputs;
|
||||
|
||||
/* Figure out the mapping between per-primitive and new per-vertex outputs. */
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
int location = var->data.location;
|
||||
|
||||
if (!(BITFIELD64_BIT(location) & remapped_outputs))
|
||||
continue;
|
||||
|
||||
/* Although primitive shading rate, layer and viewport have predefined
|
||||
* place in MUE Primitive Header (so we can't really move them anywhere),
|
||||
* we have to copy them to per-vertex space if fragment shader reads them.
|
||||
*/
|
||||
assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
|
||||
location == VARYING_SLOT_LAYER ||
|
||||
location == VARYING_SLOT_VIEWPORT ||
|
||||
location == VARYING_SLOT_PRIMITIVE_ID ||
|
||||
location >= VARYING_SLOT_VAR0);
|
||||
|
||||
const struct glsl_type *type = var->type;
|
||||
if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
|
||||
assert(glsl_type_is_array(type));
|
||||
type = glsl_get_array_element(type);
|
||||
}
|
||||
|
||||
unsigned num_slots = glsl_count_attribute_slots(type, false);
|
||||
|
||||
for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) {
|
||||
uint64_t mask = BITFIELD64_MASK(num_slots) << slot;
|
||||
if ((all_outputs & mask) == 0) {
|
||||
wa_mapping[location] = slot;
|
||||
all_outputs |= mask;
|
||||
attrs++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (wa_mapping[location] == 0) {
|
||||
fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (attrs == 0)
|
||||
if (!force_conversion)
|
||||
return false;
|
||||
|
||||
unsigned vertices_per_primitive =
|
||||
mesa_vertices_per_prim(nir->info.mesh.primitive_type);
|
||||
|
||||
nir_variable *primitive_count_var = copy_primitive_count_writes(nir);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_builder b = nir_builder_at(nir_after_impl(impl));
|
||||
|
||||
/* wait for all subgroups to finish */
|
||||
nir_barrier(&b, SCOPE_WORKGROUP);
|
||||
|
||||
nir_def *zero = nir_imm_int(&b, 0);
|
||||
|
||||
nir_def *provoking_vertex =
|
||||
nir_load_inline_data_intel(
|
||||
&b, 1, 32,
|
||||
.base = ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX);
|
||||
nir_def *local_invocation_index = nir_load_local_invocation_index(&b);
|
||||
|
||||
nir_def *cmp = nir_ieq(&b, local_invocation_index, zero);
|
||||
nir_if *if_stmt = nir_push_if(&b, cmp);
|
||||
{
|
||||
nir_variable *primitive_indices_var = NULL;
|
||||
|
||||
unsigned num_other_variables = 0;
|
||||
nir_foreach_shader_out_variable(var, b.shader) {
|
||||
if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
|
||||
continue;
|
||||
num_other_variables++;
|
||||
}
|
||||
|
||||
nir_deref_instr **per_vertex_derefs =
|
||||
ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables);
|
||||
|
||||
unsigned num_per_vertex_variables = 0;
|
||||
|
||||
unsigned processed = 0;
|
||||
nir_foreach_shader_out_variable(var, b.shader) {
|
||||
if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
|
||||
continue;
|
||||
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_PRIMITIVE_COUNT:
|
||||
break;
|
||||
case VARYING_SLOT_PRIMITIVE_INDICES:
|
||||
primitive_indices_var = var;
|
||||
break;
|
||||
default: {
|
||||
const struct glsl_type *type = var->type;
|
||||
assert(glsl_type_is_array(type));
|
||||
const struct glsl_type *array_element_type =
|
||||
glsl_get_array_element(type);
|
||||
|
||||
if (dup_vertices) {
|
||||
/*
|
||||
* Resize type of array output to make space for one extra
|
||||
* vertex attribute for each primitive, so we ensure that
|
||||
* the provoking vertex is not shared between primitives.
|
||||
*/
|
||||
const struct glsl_type *new_type =
|
||||
glsl_array_type(array_element_type,
|
||||
glsl_get_length(type) +
|
||||
nir->info.mesh.max_primitives_out,
|
||||
0);
|
||||
|
||||
var->type = new_type;
|
||||
}
|
||||
|
||||
per_vertex_derefs[num_per_vertex_variables++] =
|
||||
nir_build_deref_var(&b, var);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
++processed;
|
||||
}
|
||||
assert(processed == num_other_variables);
|
||||
|
||||
assert(primitive_count_var != NULL);
|
||||
assert(primitive_indices_var != NULL);
|
||||
|
||||
/* Update types of derefs to match type of variables they (de)reference. */
|
||||
if (dup_vertices) {
|
||||
nir_foreach_function_impl(impl, b.shader) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (deref->deref_type != nir_deref_type_var)
|
||||
continue;
|
||||
|
||||
if (deref->var->type != deref->type)
|
||||
deref->type = deref->var->type;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* indexed by slot of per-prim attribute */
|
||||
struct {
|
||||
nir_deref_instr *per_prim_deref;
|
||||
nir_deref_instr *per_vert_deref;
|
||||
} mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, };
|
||||
|
||||
/* Create new per-vertex output variables mirroring per-primitive variables
|
||||
* and create derefs for both old and new variables.
|
||||
*/
|
||||
nir_foreach_shader_out_variable(var, b.shader) {
|
||||
gl_varying_slot location = var->data.location;
|
||||
|
||||
if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0)
|
||||
continue;
|
||||
if (wa_mapping[location] == 0)
|
||||
continue;
|
||||
|
||||
const struct glsl_type *type = var->type;
|
||||
assert(glsl_type_is_array(type));
|
||||
const struct glsl_type *array_element_type = glsl_get_array_element(type);
|
||||
|
||||
const struct glsl_type *new_type =
|
||||
glsl_array_type(array_element_type,
|
||||
nir->info.mesh.max_vertices_out +
|
||||
(dup_vertices ? nir->info.mesh.max_primitives_out : 0),
|
||||
0);
|
||||
|
||||
nir_variable *new_var =
|
||||
nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name);
|
||||
assert(wa_mapping[location] >= VARYING_SLOT_VAR0);
|
||||
assert(wa_mapping[location] <= VARYING_SLOT_VAR31);
|
||||
new_var->data.location = wa_mapping[location];
|
||||
new_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
|
||||
mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var);
|
||||
mapping[location].per_prim_deref = nir_build_deref_var(&b, var);
|
||||
}
|
||||
|
||||
nir_def *trueconst = nir_imm_true(&b);
|
||||
|
||||
/*
|
||||
* for each Primitive (0 : primitiveCount)
|
||||
* if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]]
|
||||
* create 1 new vertex at offset "Vertex"
|
||||
* copy per vert attributes of provoking vertex to the new one
|
||||
* update PrimitiveIndices[Primitive][provoking vertex]
|
||||
* Vertex++
|
||||
* else
|
||||
* VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true
|
||||
*
|
||||
* for each attribute : mapping
|
||||
* copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex]
|
||||
*/
|
||||
|
||||
/* primitive count */
|
||||
nir_def *primitive_count = nir_load_var(&b, primitive_count_var);
|
||||
|
||||
/* primitive index */
|
||||
nir_variable *primitive_var =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "Primitive");
|
||||
nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var);
|
||||
nir_store_deref(&b, primitive_deref, zero, 1);
|
||||
|
||||
/* vertex index */
|
||||
nir_variable *vertex_var =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "Vertex");
|
||||
nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var);
|
||||
nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1);
|
||||
|
||||
/* used vertices bitvector */
|
||||
const struct glsl_type *used_vertex_type =
|
||||
glsl_array_type(glsl_bool_type(),
|
||||
nir->info.mesh.max_vertices_out,
|
||||
0);
|
||||
nir_variable *used_vertex_var =
|
||||
nir_local_variable_create(impl, used_vertex_type, "VertexUsed");
|
||||
nir_deref_instr *used_vertex_deref =
|
||||
nir_build_deref_var(&b, used_vertex_var);
|
||||
/* Initialize it as "not used" */
|
||||
for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) {
|
||||
nir_deref_instr *indexed_used_vertex_deref =
|
||||
nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i));
|
||||
nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1);
|
||||
}
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_def *primitive = nir_load_deref(&b, primitive_deref);
|
||||
nir_def *cmp = nir_ige(&b, primitive, primitive_count);
|
||||
|
||||
nir_if *loop_check = nir_push_if(&b, cmp);
|
||||
nir_jump(&b, nir_jump_break);
|
||||
nir_pop_if(&b, loop_check);
|
||||
|
||||
nir_deref_instr *primitive_indices_deref =
|
||||
nir_build_deref_var(&b, primitive_indices_var);
|
||||
nir_deref_instr *indexed_primitive_indices_deref;
|
||||
nir_def *src_vertex;
|
||||
nir_def *prim_indices;
|
||||
|
||||
/* array of vectors, we have to extract index out of array deref */
|
||||
indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive);
|
||||
prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref);
|
||||
src_vertex = nir_vector_extract(&b, prim_indices, provoking_vertex);
|
||||
|
||||
nir_def *dst_vertex = nir_load_deref(&b, vertex_deref);
|
||||
|
||||
nir_deref_instr *indexed_used_vertex_deref =
|
||||
nir_build_deref_array(&b, used_vertex_deref, src_vertex);
|
||||
nir_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref);
|
||||
if (!dup_vertices)
|
||||
used_vertex = nir_imm_false(&b);
|
||||
|
||||
nir_if *vertex_used_check = nir_push_if(&b, used_vertex);
|
||||
{
|
||||
for (unsigned a = 0; a < num_per_vertex_variables; ++a) {
|
||||
nir_deref_instr *attr_arr = per_vertex_derefs[a];
|
||||
nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex);
|
||||
nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex);
|
||||
|
||||
nir_copy_deref(&b, dst, src);
|
||||
}
|
||||
|
||||
/* replace one component of primitive indices vector */
|
||||
nir_def *new_val =
|
||||
nir_vector_insert(&b, prim_indices, dst_vertex, provoking_vertex);
|
||||
|
||||
/* and store complete vector */
|
||||
nir_store_deref(&b, indexed_primitive_indices_deref, new_val,
|
||||
BITFIELD_MASK(vertices_per_primitive));
|
||||
|
||||
nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
|
||||
if (!mapping[i].per_vert_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *src =
|
||||
nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
|
||||
nir_deref_instr *dst =
|
||||
nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex);
|
||||
|
||||
nir_copy_deref(&b, dst, src);
|
||||
}
|
||||
}
|
||||
nir_push_else(&b, vertex_used_check);
|
||||
{
|
||||
nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
|
||||
if (!mapping[i].per_vert_deref)
|
||||
continue;
|
||||
|
||||
nir_deref_instr *src =
|
||||
nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
|
||||
nir_deref_instr *dst =
|
||||
nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex);
|
||||
|
||||
nir_copy_deref(&b, dst, src);
|
||||
}
|
||||
|
||||
}
|
||||
nir_pop_if(&b, vertex_used_check);
|
||||
|
||||
nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1);
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
}
|
||||
nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */
|
||||
|
||||
if (dup_vertices)
|
||||
nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out;
|
||||
|
||||
if (should_print_nir(nir)) {
|
||||
printf("%s\n", __func__);
|
||||
nir_print_shader(nir, stdout);
|
||||
}
|
||||
|
||||
/* deal with copy_derefs */
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
||||
nir_shader_gather_info(nir, impl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (deref->deref_type != nir_deref_type_var)
|
||||
return false;
|
||||
|
||||
nir_variable *var = deref->var;
|
||||
if (!(var->data.mode & nir_var_shader_in))
|
||||
return false;
|
||||
|
||||
int location = var->data.location;
|
||||
nir_deref_instr **new_derefs = (nir_deref_instr **)data;
|
||||
if (new_derefs[location] == NULL)
|
||||
return false;
|
||||
|
||||
nir_instr_remove(&deref->instr);
|
||||
nir_def_rewrite_uses(&deref->def, &new_derefs[location]->def);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr,
|
||||
nir_metadata_none, (void *)mapping);
|
||||
}
|
||||
|
||||
/* Update fragment shader inputs with new ones. */
|
||||
static void
|
||||
anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||
gl_varying_slot *wa_mapping)
|
||||
{
|
||||
/* indexed by slot of per-prim attribute */
|
||||
nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, };
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_builder b = nir_builder_at(nir_before_impl(impl));
|
||||
|
||||
nir_foreach_shader_in_variable_safe(var, nir) {
|
||||
gl_varying_slot location = var->data.location;
|
||||
gl_varying_slot new_location = wa_mapping[location];
|
||||
if (new_location == 0)
|
||||
continue;
|
||||
|
||||
assert(wa_mapping[new_location] == 0);
|
||||
|
||||
nir_variable *new_var =
|
||||
nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name);
|
||||
new_var->data.location = new_location;
|
||||
new_var->data.location_frac = var->data.location_frac;
|
||||
new_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
|
||||
new_derefs[location] = nir_build_deref_var(&b, new_var);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs);
|
||||
|
||||
nir_shader_gather_info(nir, impl);
|
||||
}
|
||||
|
||||
void
|
||||
anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
|
||||
struct nir_shader *fs_nir,
|
||||
struct anv_device *device)
|
||||
{
|
||||
const struct intel_device_info *devinfo = device->info;
|
||||
|
||||
int mesh_conv_prim_attrs_to_vert_attrs =
|
||||
device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs;
|
||||
if (mesh_conv_prim_attrs_to_vert_attrs < 0 &&
|
||||
!intel_needs_workaround(devinfo, 18019110168))
|
||||
mesh_conv_prim_attrs_to_vert_attrs = 0;
|
||||
|
||||
if (mesh_conv_prim_attrs_to_vert_attrs != 0) {
|
||||
uint64_t fs_inputs = 0;
|
||||
nir_foreach_shader_in_variable(var, fs_nir)
|
||||
fs_inputs |= BITFIELD64_BIT(var->data.location);
|
||||
|
||||
void *stage_ctx = ralloc_context(NULL);
|
||||
|
||||
gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, };
|
||||
|
||||
const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2;
|
||||
const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0;
|
||||
|
||||
if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping,
|
||||
fs_inputs, stage_ctx,
|
||||
dup_vertices, force_conversion))
|
||||
anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping);
|
||||
|
||||
ralloc_free(stage_ctx);
|
||||
}
|
||||
}
|
||||
|
|
@ -1455,6 +1455,14 @@ anv_pipeline_link_mesh(const struct brw_compiler *compiler,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
mesh_load_provoking_vertex(nir_builder *b, void *data)
|
||||
{
|
||||
return nir_load_inline_data_intel(
|
||||
b, 1, 32,
|
||||
.base = ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
|
||||
void *mem_ctx,
|
||||
|
|
@ -1475,6 +1483,7 @@ anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
|
|||
},
|
||||
.key = &mesh_stage->key.mesh,
|
||||
.prog_data = &mesh_stage->prog_data.mesh,
|
||||
.load_provoking_vertex = mesh_load_provoking_vertex,
|
||||
};
|
||||
|
||||
if (prev_stage) {
|
||||
|
|
@ -2323,12 +2332,6 @@ anv_graphics_pipeline_compile(struct anv_graphics_base_pipeline *pipeline,
|
|||
anv_pipeline_nir_preprocess(&pipeline->base, &stages[s]);
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_MESH].info && stages[MESA_SHADER_FRAGMENT].info) {
|
||||
anv_apply_per_prim_attr_wa(stages[MESA_SHADER_MESH].nir,
|
||||
stages[MESA_SHADER_FRAGMENT].nir,
|
||||
device);
|
||||
}
|
||||
|
||||
/* Walk backwards to link */
|
||||
struct anv_pipeline_stage *next_stage = NULL;
|
||||
for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
|
||||
|
|
|
|||
|
|
@ -170,7 +170,6 @@ libanv_files = files(
|
|||
'anv_kmd_backend.h',
|
||||
'anv_measure.c',
|
||||
'anv_measure.h',
|
||||
'anv_mesh_perprim_wa.c',
|
||||
'anv_nir.h',
|
||||
'anv_nir_apply_pipeline_layout.c',
|
||||
'anv_nir_compute_push_layout.c',
|
||||
|
|
|
|||
|
|
@ -824,15 +824,6 @@
|
|||
DRI_CONF_OPT_B(anv_force_filter_addr_rounding, def, \
|
||||
"Force min/mag filter address rounding to be enabled even for NEAREST sampling")
|
||||
|
||||
#define DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(def) \
|
||||
DRI_CONF_OPT_E(anv_mesh_conv_prim_attrs_to_vert_attrs, def, -2, 2, \
|
||||
"Apply workaround for gfx12.5 per-prim attribute corruption HW bug", \
|
||||
DRI_CONF_ENUM(-2, "enable attribute conversion and vertex duplication ONLY if needed") \
|
||||
DRI_CONF_ENUM(-1, "enable attribute conversion ONLY if needed") \
|
||||
DRI_CONF_ENUM(0, "disable workaround") \
|
||||
DRI_CONF_ENUM(1, "enable attribute conversion ALWAYS") \
|
||||
DRI_CONF_ENUM(2, "enable attribute conversion and vertex duplication ALWAYS") )
|
||||
|
||||
#define DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(def) \
|
||||
DRI_CONF_OPT_B(fp64_workaround_enabled, def, \
|
||||
"Use softpf64 when the shader uses float64, but the device doesn't support that type")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue