mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 20:00:10 +01:00
brw: handle wa_18019110168 with independent shader compilation
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35103>
This commit is contained in:
parent
bc8d18aee2
commit
fcf4401824
13 changed files with 283 additions and 42 deletions
|
|
@ -361,6 +361,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
case nir_intrinsic_load_max_polygon_intel:
|
case nir_intrinsic_load_max_polygon_intel:
|
||||||
case nir_intrinsic_load_ray_base_mem_addr_intel:
|
case nir_intrinsic_load_ray_base_mem_addr_intel:
|
||||||
case nir_intrinsic_load_ray_hw_stack_size_intel:
|
case nir_intrinsic_load_ray_hw_stack_size_intel:
|
||||||
|
case nir_intrinsic_load_per_primitive_remap_intel:
|
||||||
is_divergent = false;
|
is_divergent = false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2336,6 +2336,10 @@ intrinsic("read_attribute_payload_intel", dest_comp=1, bit_sizes=[32],
|
||||||
src_comp=[1],
|
src_comp=[1],
|
||||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||||
|
|
||||||
|
# Populate the per-primitive payload at an offset
|
||||||
|
# src[] = { value, offset }
|
||||||
|
intrinsic("store_per_primitive_payload_intel", src_comp=[-1], indices=[BASE, COMPONENT])
|
||||||
|
|
||||||
# Number of data items being operated on for a SIMD program.
|
# Number of data items being operated on for a SIMD program.
|
||||||
system_value("simd_width_intel", 1)
|
system_value("simd_width_intel", 1)
|
||||||
|
|
||||||
|
|
@ -2417,6 +2421,9 @@ intrinsic("load_inline_data_intel", [], dest_comp=0,
|
||||||
# Dynamic fragment shader parameters.
|
# Dynamic fragment shader parameters.
|
||||||
system_value("fs_msaa_intel", 1)
|
system_value("fs_msaa_intel", 1)
|
||||||
|
|
||||||
|
# Per primitive remapping table offset.
|
||||||
|
system_value("per_primitive_remap_intel", 1)
|
||||||
|
|
||||||
# Intrinsics for Intel bindless thread dispatch
|
# Intrinsics for Intel bindless thread dispatch
|
||||||
# BASE=brw_topoloy_id
|
# BASE=brw_topoloy_id
|
||||||
system_value("topology_id_intel", 1, indices=[BASE])
|
system_value("topology_id_intel", 1, indices=[BASE])
|
||||||
|
|
|
||||||
|
|
@ -4692,12 +4692,13 @@ iris_compute_first_urb_slot_required(struct iris_compiled_shader *fs_shader,
|
||||||
const struct intel_vue_map *prev_stage_vue_map)
|
const struct intel_vue_map *prev_stage_vue_map)
|
||||||
{
|
{
|
||||||
#if GFX_VER >= 9
|
#if GFX_VER >= 9
|
||||||
uint32_t read_offset, read_length, num_varyings, primid_offset;
|
uint32_t read_offset, read_length, num_varyings, primid_offset, flat_inputs;
|
||||||
brw_compute_sbe_per_vertex_urb_read(prev_stage_vue_map,
|
brw_compute_sbe_per_vertex_urb_read(prev_stage_vue_map,
|
||||||
false /* mesh*/,
|
false /* mesh*/,
|
||||||
|
false /* per_primitive_remapping */,
|
||||||
brw_wm_prog_data(fs_shader->brw_prog_data),
|
brw_wm_prog_data(fs_shader->brw_prog_data),
|
||||||
&read_offset, &read_length, &num_varyings,
|
&read_offset, &read_length, &num_varyings,
|
||||||
&primid_offset);
|
&primid_offset, &flat_inputs);
|
||||||
return 2 * read_offset;
|
return 2 * read_offset;
|
||||||
#else
|
#else
|
||||||
const struct iris_fs_data *fs_data = iris_fs_data(fs_shader);
|
const struct iris_fs_data *fs_data = iris_fs_data(fs_shader);
|
||||||
|
|
|
||||||
|
|
@ -1303,6 +1303,12 @@ brw_assign_urb_setup(brw_shader &s)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inst->dst.file == ATTR) {
|
||||||
|
inst->dst = remap_attr_reg(s, prog_data, inst->dst,
|
||||||
|
urb_start, inst->exec_size);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
if (inst->src[i].file == ATTR) {
|
if (inst->src[i].file == ATTR) {
|
||||||
inst->src[i] = remap_attr_reg(s, prog_data, inst->src[i],
|
inst->src[i] = remap_attr_reg(s, prog_data, inst->src[i],
|
||||||
|
|
@ -1468,12 +1474,22 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||||
|
|
||||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||||
const unsigned max_subgroup_size = 32;
|
const unsigned max_subgroup_size = 32;
|
||||||
|
unsigned max_polygons = MAX2(1, params->max_polygons);
|
||||||
|
|
||||||
brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size);
|
brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size);
|
||||||
|
|
||||||
if (params->mue_map && params->mue_map->wa_18019110168_active) {
|
if (brw_nir_fragment_shader_needs_wa_18019110168(devinfo, key->mesh_input, nir)) {
|
||||||
brw_nir_frag_convert_attrs_prim_to_vert(
|
if (params->mue_map && params->mue_map->wa_18019110168_active) {
|
||||||
nir, params->mue_map->per_primitive_offsets);
|
brw_nir_frag_convert_attrs_prim_to_vert(
|
||||||
|
nir, params->mue_map->per_primitive_offsets);
|
||||||
|
} else {
|
||||||
|
NIR_PASS(_, nir, brw_nir_frag_convert_attrs_prim_to_vert_indirect,
|
||||||
|
devinfo, params);
|
||||||
|
}
|
||||||
|
/* Remapping per-primitive inputs into unused per-vertex inputs cannot
|
||||||
|
* work with multipolygon.
|
||||||
|
*/
|
||||||
|
max_polygons = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
brw_nir_lower_fs_inputs(nir, devinfo, key);
|
||||||
|
|
@ -1559,8 +1575,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||||
unsigned max_dispatch_width = reqd_dispatch_width ? reqd_dispatch_width : 32;
|
unsigned max_dispatch_width = reqd_dispatch_width ? reqd_dispatch_width : 32;
|
||||||
brw_shader *vbase = NULL;
|
brw_shader *vbase = NULL;
|
||||||
|
|
||||||
if (params->max_polygons >= 2 && !key->coarse_pixel) {
|
if (max_polygons >= 2 && !key->coarse_pixel) {
|
||||||
if (params->max_polygons >= 4 && max_dispatch_width >= 32 &&
|
if (max_polygons >= 4 && max_dispatch_width >= 32 &&
|
||||||
4 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
4 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
||||||
INTEL_SIMD(FS, 4X8)) {
|
INTEL_SIMD(FS, 4X8)) {
|
||||||
/* Try a quad-SIMD8 compile */
|
/* Try a quad-SIMD8 compile */
|
||||||
|
|
@ -1748,13 +1764,12 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (devinfo->ver >= 12 && !has_spilled &&
|
if (devinfo->ver >= 12 && !has_spilled &&
|
||||||
params->max_polygons >= 2 && !key->coarse_pixel &&
|
max_polygons >= 2 && !key->coarse_pixel &&
|
||||||
reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
|
reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
|
||||||
brw_shader *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get();
|
brw_shader *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get();
|
||||||
assert(vbase);
|
assert(vbase);
|
||||||
|
|
||||||
if (devinfo->ver >= 20 &&
|
if (devinfo->ver >= 20 && max_polygons >= 4 &&
|
||||||
params->max_polygons >= 4 &&
|
|
||||||
vbase->max_dispatch_width >= 32 &&
|
vbase->max_dispatch_width >= 32 &&
|
||||||
4 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
4 * prog_data->num_varying_inputs <= MAX_VARYING &&
|
||||||
INTEL_SIMD(FS, 4X8)) {
|
INTEL_SIMD(FS, 4X8)) {
|
||||||
|
|
@ -1889,11 +1904,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
||||||
extern "C" void
|
extern "C" void
|
||||||
brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map,
|
brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map,
|
||||||
bool mesh,
|
bool mesh,
|
||||||
|
bool per_primitive_remapping,
|
||||||
const struct brw_wm_prog_data *wm_prog_data,
|
const struct brw_wm_prog_data *wm_prog_data,
|
||||||
uint32_t *out_read_offset,
|
uint32_t *out_read_offset,
|
||||||
uint32_t *out_read_length,
|
uint32_t *out_read_length,
|
||||||
uint32_t *out_num_varyings,
|
uint32_t *out_num_varyings,
|
||||||
uint32_t *out_primitive_id_offset)
|
uint32_t *out_primitive_id_offset,
|
||||||
|
uint32_t *out_flat_inputs)
|
||||||
{
|
{
|
||||||
int first_slot = INT32_MAX, last_slot = -1;
|
int first_slot = INT32_MAX, last_slot = -1;
|
||||||
|
|
||||||
|
|
@ -1931,6 +1948,7 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m
|
||||||
(first_slot >= 0 && last_slot >= 0 && last_slot >= first_slot));
|
(first_slot >= 0 && last_slot >= 0 && last_slot >= first_slot));
|
||||||
|
|
||||||
uint32_t num_varyings = wm_prog_data->num_varying_inputs;
|
uint32_t num_varyings = wm_prog_data->num_varying_inputs;
|
||||||
|
uint32_t remapped_flat_inputs = 0;
|
||||||
|
|
||||||
/* When using INTEL_VUE_LAYOUT_SEPARATE_MESH, the location of the
|
/* When using INTEL_VUE_LAYOUT_SEPARATE_MESH, the location of the
|
||||||
* PrimitiveID is unknown at compile time, here we compute the offset
|
* PrimitiveID is unknown at compile time, here we compute the offset
|
||||||
|
|
@ -1939,7 +1957,19 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m
|
||||||
*/
|
*/
|
||||||
*out_primitive_id_offset = 0;
|
*out_primitive_id_offset = 0;
|
||||||
if (prev_stage_vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE_MESH) {
|
if (prev_stage_vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE_MESH) {
|
||||||
if (mesh) {
|
if (per_primitive_remapping && wm_prog_data->per_primitive_inputs != 0) {
|
||||||
|
/* When the mesh shader remaps per-primitive slots to per-vertex
|
||||||
|
* ones, read the entire set of slots.
|
||||||
|
*/
|
||||||
|
assert(mesh);
|
||||||
|
remapped_flat_inputs =
|
||||||
|
((1u << prev_stage_vue_map->num_slots) - 1) &
|
||||||
|
~((1u << last_slot) - 1);
|
||||||
|
*out_flat_inputs |= remapped_flat_inputs;
|
||||||
|
last_slot = prev_stage_vue_map->num_slots - 1;
|
||||||
|
*out_primitive_id_offset = INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH;
|
||||||
|
num_varyings = prev_stage_vue_map->num_slots - first_slot;
|
||||||
|
} else if (mesh) {
|
||||||
/* When using Mesh, the PrimitiveID is in the per-primitive block. */
|
/* When using Mesh, the PrimitiveID is in the per-primitive block. */
|
||||||
if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] >= 0)
|
if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] >= 0)
|
||||||
num_varyings--;
|
num_varyings--;
|
||||||
|
|
@ -1976,6 +2006,8 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m
|
||||||
last_slot = MAX2(primitive_id_slot, last_slot);
|
last_slot = MAX2(primitive_id_slot, last_slot);
|
||||||
|
|
||||||
*out_primitive_id_offset = primitive_id_slot - first_slot;
|
*out_primitive_id_offset = primitive_id_slot - first_slot;
|
||||||
|
/* Make sure to have constant interpolation on PrimitiveID */
|
||||||
|
remapped_flat_inputs |= BITFIELD_BIT(*out_primitive_id_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1990,6 +2022,8 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m
|
||||||
*out_read_length = DIV_ROUND_UP(last_slot - first_slot + 1, 2);
|
*out_read_length = DIV_ROUND_UP(last_slot - first_slot + 1, 2);
|
||||||
*out_num_varyings = num_varyings;
|
*out_num_varyings = num_varyings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*out_flat_inputs = wm_prog_data->flat_inputs | remapped_flat_inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void
|
extern "C" void
|
||||||
|
|
@ -2020,6 +2054,13 @@ brw_compute_sbe_per_primitive_urb_read(uint64_t inputs_read,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
*out_read_offset = DIV_ROUND_UP(first_read, 32);
|
/* Not loading any per-primitive data in this case, the push constants
|
||||||
*out_read_length = DIV_ROUND_UP(num_varyings, 2);
|
* should be adjusted though.
|
||||||
|
*/
|
||||||
|
if (mue_map->wa_18019110168_active) {
|
||||||
|
*out_read_offset = *out_read_length = 0;
|
||||||
|
} else {
|
||||||
|
*out_read_offset = DIV_ROUND_UP(first_read, 32);
|
||||||
|
*out_read_length = DIV_ROUND_UP(num_varyings, 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1316,19 +1316,24 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
||||||
g.generate_code(selected->cfg, selected->dispatch_width, selected->shader_stats,
|
g.generate_code(selected->cfg, selected->dispatch_width, selected->shader_stats,
|
||||||
selected->performance_analysis.require(), params->base.stats);
|
selected->performance_analysis.require(), params->base.stats);
|
||||||
if (prog_data->map.wa_18019110168_active) {
|
if (prog_data->map.wa_18019110168_active) {
|
||||||
|
int8_t remap_table[VARYING_SLOT_TESS_MAX];
|
||||||
|
memset(remap_table, -1, sizeof(remap_table));
|
||||||
|
for (uint32_t i = 0; i < ARRAY_SIZE(wa_18019110168_mapping); i++) {
|
||||||
|
if (wa_18019110168_mapping[i] != -1)
|
||||||
|
remap_table[i] = prog_data->map.vue_map.varying_to_slot[wa_18019110168_mapping[i]];
|
||||||
|
}
|
||||||
uint8_t *const_data =
|
uint8_t *const_data =
|
||||||
(uint8_t *) rzalloc_size(params->base.mem_ctx,
|
(uint8_t *) rzalloc_size(params->base.mem_ctx,
|
||||||
nir->constant_data_size +
|
nir->constant_data_size + sizeof(remap_table));
|
||||||
sizeof(prog_data->map.per_primitive_offsets));
|
|
||||||
memcpy(const_data, nir->constant_data, nir->constant_data_size);
|
memcpy(const_data, nir->constant_data, nir->constant_data_size);
|
||||||
memcpy(const_data + nir->constant_data_size,
|
memcpy(const_data + nir->constant_data_size, remap_table, sizeof(remap_table));
|
||||||
prog_data->map.per_primitive_offsets,
|
g.add_const_data(const_data, nir->constant_data_size + sizeof(remap_table));
|
||||||
sizeof(prog_data->map.per_primitive_offsets));
|
|
||||||
g.add_const_data(const_data,
|
|
||||||
nir->constant_data_size +
|
|
||||||
sizeof(prog_data->map.per_primitive_offset));
|
|
||||||
prog_data->wa_18019110168_mapping_offset =
|
prog_data->wa_18019110168_mapping_offset =
|
||||||
prog_data->base.base.const_data_offset + nir->constant_data_size;
|
prog_data->base.base.const_data_offset + nir->constant_data_size;
|
||||||
|
for (uint32_t i = 0; i < ARRAY_SIZE(remap_table); i++) {
|
||||||
|
if (remap_table[i] != -1)
|
||||||
|
fprintf(stderr, "%u -> %hhi\n", i, remap_table[i]);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -533,6 +533,7 @@ enum brw_shader_reloc_id {
|
||||||
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
|
||||||
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
|
||||||
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
|
||||||
|
BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH,
|
||||||
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE,
|
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE,
|
||||||
BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE =
|
BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE =
|
||||||
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1,
|
BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1,
|
||||||
|
|
@ -797,6 +798,12 @@ struct brw_wm_prog_data {
|
||||||
*/
|
*/
|
||||||
unsigned msaa_flags_param;
|
unsigned msaa_flags_param;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push constant location of the remapping offset in the instruction heap
|
||||||
|
* for Wa_18019110168.
|
||||||
|
*/
|
||||||
|
unsigned per_primitive_remap_param;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mask of which interpolation modes are required by the fragment shader.
|
* Mask of which interpolation modes are required by the fragment shader.
|
||||||
* Those interpolations are delivered as part of the thread payload. Used
|
* Those interpolations are delivered as part of the thread payload. Used
|
||||||
|
|
@ -1721,12 +1728,13 @@ brw_compute_first_fs_urb_slot_required(uint64_t inputs_read,
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map,
|
brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map,
|
||||||
bool mesh,
|
bool mesh, bool per_primitive_remapping,
|
||||||
const struct brw_wm_prog_data *wm_prog_data,
|
const struct brw_wm_prog_data *wm_prog_data,
|
||||||
uint32_t *out_first_slot,
|
uint32_t *out_first_slot,
|
||||||
uint32_t *num_slots,
|
uint32_t *num_slots,
|
||||||
uint32_t *out_num_varyings,
|
uint32_t *out_num_varyings,
|
||||||
uint32_t *out_primitive_id_offset);
|
uint32_t *out_primitive_id_offset,
|
||||||
|
uint32_t *out_flat_inputs);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the URB offset at which SBE should read the per primitive date
|
* Computes the URB offset at which SBE should read the per primitive date
|
||||||
|
|
|
||||||
|
|
@ -4428,6 +4428,20 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_store_per_primitive_payload_intel: {
|
||||||
|
const brw_builder ubld = bld.exec_all().group(1, 0);
|
||||||
|
brw_reg src = get_nir_src(ntb, instr->src[0], -1);
|
||||||
|
src = retype(bld.emit_uniformize(src), BRW_TYPE_UD);
|
||||||
|
|
||||||
|
ubld.MOV(retype(
|
||||||
|
brw_per_primitive_reg(bld,
|
||||||
|
nir_intrinsic_base(instr),
|
||||||
|
nir_intrinsic_component(instr)),
|
||||||
|
BRW_TYPE_UD),
|
||||||
|
component(src, 0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_fs_input_interp_deltas: {
|
case nir_intrinsic_load_fs_input_interp_deltas: {
|
||||||
assert(s.stage == MESA_SHADER_FRAGMENT);
|
assert(s.stage == MESA_SHADER_FRAGMENT);
|
||||||
assert(nir_src_as_uint(instr->src[0]) == 0);
|
assert(nir_src_as_uint(instr->src[0]) == 0);
|
||||||
|
|
@ -4586,9 +4600,15 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
||||||
bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
|
bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_per_primitive_remap_intel:
|
||||||
|
bld.MOV(retype(dest, BRW_TYPE_UD),
|
||||||
|
brw_dynamic_per_primitive_remap(brw_wm_prog_data(s.prog_data)));
|
||||||
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_read_attribute_payload_intel: {
|
case nir_intrinsic_read_attribute_payload_intel: {
|
||||||
const brw_reg offset = retype(get_nir_src(ntb, instr->src[0], 0),
|
const brw_reg offset = retype(
|
||||||
BRW_TYPE_UD);
|
bld.emit_uniformize(get_nir_src(ntb, instr->src[0], 0)),
|
||||||
|
BRW_TYPE_UD);
|
||||||
bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
|
bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -729,15 +729,59 @@ brw_nir_vertex_attribute_offset(nir_builder *b,
|
||||||
12);
|
12);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nir_block *
|
||||||
|
fragment_top_block_or_after_wa_18019110168(nir_function_impl *impl)
|
||||||
|
{
|
||||||
|
nir_if *first_if =
|
||||||
|
nir_block_get_following_if(nir_start_block(impl));
|
||||||
|
nir_block *post_wa_18019110168_block = NULL;
|
||||||
|
if (first_if) {
|
||||||
|
nir_block *last_if_block = nir_if_last_then_block(first_if);
|
||||||
|
nir_foreach_block_in_cf_node(block, &first_if->cf_node) {
|
||||||
|
nir_foreach_instr(instr, block) {
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intrin->intrinsic == nir_intrinsic_store_per_primitive_payload_intel) {
|
||||||
|
post_wa_18019110168_block = last_if_block->successors[0];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (post_wa_18019110168_block)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return post_wa_18019110168_block ?
|
||||||
|
post_wa_18019110168_block : nir_start_block(impl);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_nir_lower_fs_inputs(nir_shader *nir,
|
brw_nir_lower_fs_inputs(nir_shader *nir,
|
||||||
const struct intel_device_info *devinfo,
|
const struct intel_device_info *devinfo,
|
||||||
const struct brw_wm_prog_key *key)
|
const struct brw_wm_prog_key *key)
|
||||||
{
|
{
|
||||||
|
/* Always pull the PrimitiveID from the per-primitive block if mesh can be
|
||||||
|
* involved.
|
||||||
|
*/
|
||||||
|
if (key->mesh_input != INTEL_NEVER) {
|
||||||
|
nir_foreach_shader_in_variable(var, nir) {
|
||||||
|
if (var->data.location == VARYING_SLOT_PRIMITIVE_ID) {
|
||||||
|
var->data.per_primitive = true;
|
||||||
|
nir->info.per_primitive_inputs |= VARYING_BIT_PRIMITIVE_ID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nir_def *indirect_primitive_id = NULL;
|
nir_def *indirect_primitive_id = NULL;
|
||||||
if (key->base.vue_layout == INTEL_VUE_LAYOUT_SEPARATE_MESH &&
|
if (key->base.vue_layout == INTEL_VUE_LAYOUT_SEPARATE_MESH &&
|
||||||
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)) {
|
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)) {
|
||||||
nir_builder _b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir))), *b = &_b;
|
nir_builder _b = nir_builder_at(
|
||||||
|
nir_before_block(
|
||||||
|
fragment_top_block_or_after_wa_18019110168(
|
||||||
|
nir_shader_get_entrypoint(nir)))), *b = &_b;
|
||||||
nir_def *index = nir_ubitfield_extract_imm(
|
nir_def *index = nir_ubitfield_extract_imm(
|
||||||
b,
|
b,
|
||||||
nir_load_fs_msaa_intel(b),
|
nir_load_fs_msaa_intel(b),
|
||||||
|
|
@ -777,14 +821,6 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
|
||||||
var->data.interpolation = flat ? INTERP_MODE_FLAT
|
var->data.interpolation = flat ? INTERP_MODE_FLAT
|
||||||
: INTERP_MODE_SMOOTH;
|
: INTERP_MODE_SMOOTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Always pull the PrimitiveID from the per-primitive block if mesh can be involved.
|
|
||||||
*/
|
|
||||||
if (var->data.location == VARYING_SLOT_PRIMITIVE_ID &&
|
|
||||||
key->mesh_input != INTEL_NEVER) {
|
|
||||||
var->data.per_primitive = true;
|
|
||||||
nir->info.per_primitive_inputs |= VARYING_BIT_PRIMITIVE_ID;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NIR_PASS(_, nir, nir_lower_io,
|
NIR_PASS(_, nir, nir_lower_io,
|
||||||
|
|
@ -2640,7 +2676,7 @@ brw_nir_move_interpolation_to_top(nir_shader *nir)
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
nir_foreach_function_impl(impl, nir) {
|
nir_foreach_function_impl(impl, nir) {
|
||||||
nir_block *top = nir_start_block(impl);
|
nir_block *top = fragment_top_block_or_after_wa_18019110168(impl);
|
||||||
nir_cursor cursor = nir_before_instr(nir_block_first_instr(top));
|
nir_cursor cursor = nir_before_instr(nir_block_first_instr(top));
|
||||||
bool impl_progress = false;
|
bool impl_progress = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -339,6 +339,17 @@ brw_nir_mesh_shader_needs_wa_18019110168(const struct intel_device_info *devinfo
|
||||||
VARYING_BIT_PRIMITIVE_COUNT));
|
VARYING_BIT_PRIMITIVE_COUNT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
brw_nir_fragment_shader_needs_wa_18019110168(const struct intel_device_info *devinfo,
|
||||||
|
enum intel_sometimes mesh_input,
|
||||||
|
nir_shader *shader)
|
||||||
|
{
|
||||||
|
return intel_needs_workaround(devinfo, 18019110168) &&
|
||||||
|
mesh_input != INTEL_NEVER &&
|
||||||
|
(shader->info.per_primitive_inputs != 0 ||
|
||||||
|
(shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID));
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_nir_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
brw_nir_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||||
struct brw_compile_mesh_params *params,
|
struct brw_compile_mesh_params *params,
|
||||||
|
|
@ -348,6 +359,11 @@ bool
|
||||||
brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||||
const int *wa_mapping);
|
const int *wa_mapping);
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
|
struct brw_compile_fs_params *params);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -535,3 +535,83 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
|
struct brw_compile_fs_params *params)
|
||||||
|
{
|
||||||
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||||
|
nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b;
|
||||||
|
|
||||||
|
const uint64_t per_primitive_inputs = nir->info.inputs_read &
|
||||||
|
(nir->info.per_primitive_inputs | VARYING_BIT_PRIMITIVE_ID);
|
||||||
|
|
||||||
|
int per_primitive_offsets[VARYING_SLOT_MAX];
|
||||||
|
uint32_t first_read_offset = 0, per_primitive_stride = 0;
|
||||||
|
brw_compute_per_primitive_map(per_primitive_offsets,
|
||||||
|
&per_primitive_stride,
|
||||||
|
&first_read_offset,
|
||||||
|
0, nir, nir_var_shader_in,
|
||||||
|
nir->info.per_primitive_inputs,
|
||||||
|
true /* separate_shader */);
|
||||||
|
|
||||||
|
per_primitive_stride = align(per_primitive_stride, devinfo->grf_size);
|
||||||
|
|
||||||
|
nir_def *msaa_flags = nir_load_fs_msaa_intel(b);
|
||||||
|
nir_def *needs_remapping = nir_test_mask(
|
||||||
|
b, msaa_flags, INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING);
|
||||||
|
nir_push_if(b, needs_remapping);
|
||||||
|
{
|
||||||
|
nir_def *first_slot =
|
||||||
|
nir_ubitfield_extract_imm(
|
||||||
|
b, msaa_flags,
|
||||||
|
INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET,
|
||||||
|
INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE);
|
||||||
|
nir_def *remap_table_addr =
|
||||||
|
nir_pack_64_2x32_split(
|
||||||
|
b,
|
||||||
|
nir_load_per_primitive_remap_intel(b),
|
||||||
|
nir_load_reloc_const_intel(
|
||||||
|
b, BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH));
|
||||||
|
u_foreach_bit64(location, per_primitive_inputs) {
|
||||||
|
if (location < VARYING_SLOT_VAR0 &&
|
||||||
|
location != VARYING_SLOT_PRIMITIVE_ID)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Read the varying_to_slot[] array from the mesh shader constants
|
||||||
|
* space in the instruction heap.
|
||||||
|
*/
|
||||||
|
nir_def *data =
|
||||||
|
nir_load_global_constant(
|
||||||
|
b, nir_iadd_imm(b, remap_table_addr, ROUND_DOWN_TO(location, 4)),
|
||||||
|
4, 1, 32);
|
||||||
|
const unsigned bit_offset = (8 * location) % 32;
|
||||||
|
nir_def *absolute_attr_idx =
|
||||||
|
nir_ubitfield_extract_imm(b, data, bit_offset, 4);
|
||||||
|
/* Now remove the first slot visible in the FS payload */
|
||||||
|
nir_def *payload_attr_idx =
|
||||||
|
nir_iadd(b, absolute_attr_idx, nir_ineg(b, first_slot));
|
||||||
|
for (unsigned c = 0; c < 4; c++) {
|
||||||
|
/* brw_nir_vertex_attribute_offset works in scalar */
|
||||||
|
nir_def *attr_idx =
|
||||||
|
nir_iadd_imm(
|
||||||
|
b, nir_imul_imm(b, payload_attr_idx, 4), c);
|
||||||
|
/* Turn the scalar attribute index into register byte offset */
|
||||||
|
nir_def *per_vertex_offset =
|
||||||
|
nir_iadd_imm(
|
||||||
|
b,
|
||||||
|
brw_nir_vertex_attribute_offset(b, attr_idx, devinfo),
|
||||||
|
per_primitive_stride);
|
||||||
|
nir_def *value =
|
||||||
|
nir_read_attribute_payload_intel(b, per_vertex_offset);
|
||||||
|
/* Write back the values into the per-primitive location */
|
||||||
|
nir_store_per_primitive_payload_intel(
|
||||||
|
b, value, .base = location, .component = c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nir_pop_if(b, NULL);
|
||||||
|
|
||||||
|
return nir_progress(true, impl, nir_metadata_none);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -254,6 +254,12 @@ brw_dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
|
||||||
return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
|
return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline brw_reg
|
||||||
|
brw_dynamic_per_primitive_remap(const struct brw_wm_prog_data *wm_prog_data)
|
||||||
|
{
|
||||||
|
return brw_uniform_reg(wm_prog_data->per_primitive_remap_param, BRW_TYPE_UD);
|
||||||
|
}
|
||||||
|
|
||||||
enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key,
|
enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key,
|
||||||
nir_intrinsic_instr *intr);
|
nir_intrinsic_instr *intr);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ intel_sometimes_invert(enum intel_sometimes x)
|
||||||
return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
|
return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET (19)
|
||||||
|
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE (6)
|
||||||
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (25)
|
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (25)
|
||||||
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE (6)
|
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE (6)
|
||||||
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH (32)
|
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH (32)
|
||||||
|
|
@ -57,6 +59,9 @@ enum intel_msaa_flags {
|
||||||
/** True if provoking vertex is last */
|
/** True if provoking vertex is last */
|
||||||
INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST = (1 << 5),
|
INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST = (1 << 5),
|
||||||
|
|
||||||
|
/** True if we need to apply Wa_18019110168 remapping */
|
||||||
|
INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING = (1 << 6),
|
||||||
|
|
||||||
/** True if this shader has been dispatched coarse
|
/** True if this shader has been dispatched coarse
|
||||||
*
|
*
|
||||||
* This is intentionally chose to be bit 15 to correspond to the coarse bit
|
* This is intentionally chose to be bit 15 to correspond to the coarse bit
|
||||||
|
|
@ -71,10 +76,16 @@ enum intel_msaa_flags {
|
||||||
*/
|
*/
|
||||||
INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
|
INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
|
||||||
|
|
||||||
|
/** First slot read in the VUE
|
||||||
|
*
|
||||||
|
* This is not a flag but a value that cover 6bits.
|
||||||
|
*/
|
||||||
|
INTEL_MSAA_FLAG_FIRST_VUE_SLOT = (1 << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET),
|
||||||
|
|
||||||
/** Index of the PrimitiveID attribute relative to the first read
|
/** Index of the PrimitiveID attribute relative to the first read
|
||||||
* attribute.
|
* attribute.
|
||||||
*
|
*
|
||||||
* This is not a flag but a value that cover bits 20:31. Value 32 means the
|
* This is not a flag but a value that cover 6bits. Value 32 means the
|
||||||
* PrimitiveID is coming from the PerPrimitive block, written by the Mesh
|
* PrimitiveID is coming from the PerPrimitive block, written by the Mesh
|
||||||
* shader.
|
* shader.
|
||||||
*/
|
*/
|
||||||
|
|
@ -441,7 +452,9 @@ struct intel_fs_params {
|
||||||
bool coarse_pixel;
|
bool coarse_pixel;
|
||||||
bool alpha_to_coverage;
|
bool alpha_to_coverage;
|
||||||
bool provoking_vertex_last;
|
bool provoking_vertex_last;
|
||||||
|
uint32_t first_vue_slot;
|
||||||
uint32_t primitive_id_index;
|
uint32_t primitive_id_index;
|
||||||
|
bool per_primitive_remapping;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline enum intel_msaa_flags
|
static inline enum intel_msaa_flags
|
||||||
|
|
@ -473,6 +486,10 @@ intel_fs_msaa_flags(struct intel_fs_params params)
|
||||||
if (params.alpha_to_coverage)
|
if (params.alpha_to_coverage)
|
||||||
fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
|
fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
|
||||||
|
|
||||||
|
assert(params.first_vue_slot < (1 << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE));
|
||||||
|
fs_msaa_flags |= (enum intel_msaa_flags)(
|
||||||
|
params.first_vue_slot << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET);
|
||||||
|
|
||||||
assert(params.primitive_id_index < (1u << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE));
|
assert(params.primitive_id_index < (1u << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE));
|
||||||
fs_msaa_flags |= (enum intel_msaa_flags)(
|
fs_msaa_flags |= (enum intel_msaa_flags)(
|
||||||
params.primitive_id_index << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET);
|
params.primitive_id_index << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET);
|
||||||
|
|
@ -480,6 +497,9 @@ intel_fs_msaa_flags(struct intel_fs_params params)
|
||||||
if (params.provoking_vertex_last)
|
if (params.provoking_vertex_last)
|
||||||
fs_msaa_flags |= INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST;
|
fs_msaa_flags |= INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST;
|
||||||
|
|
||||||
|
if (params.per_primitive_remapping)
|
||||||
|
fs_msaa_flags |= INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING;
|
||||||
|
|
||||||
return fs_msaa_flags;
|
return fs_msaa_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -675,16 +675,16 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
||||||
anv_pipeline_emit(pipeline, final.sbe, GENX(3DSTATE_SBE), sbe) {
|
anv_pipeline_emit(pipeline, final.sbe, GENX(3DSTATE_SBE), sbe) {
|
||||||
anv_pipeline_emit(pipeline, final.sbe_swiz, GENX(3DSTATE_SBE_SWIZ), swiz) {
|
anv_pipeline_emit(pipeline, final.sbe_swiz, GENX(3DSTATE_SBE_SWIZ), swiz) {
|
||||||
int max_source_attr = 0;
|
int max_source_attr = 0;
|
||||||
uint32_t vertex_read_offset, vertex_read_length, vertex_varyings;
|
uint32_t vertex_read_offset, vertex_read_length, vertex_varyings, flat_inputs;
|
||||||
brw_compute_sbe_per_vertex_urb_read(
|
brw_compute_sbe_per_vertex_urb_read(
|
||||||
vue_map, anv_pipeline_is_mesh(pipeline), wm_prog_data,
|
vue_map, anv_pipeline_is_mesh(pipeline), false, wm_prog_data,
|
||||||
&vertex_read_offset, &vertex_read_length, &vertex_varyings,
|
&vertex_read_offset, &vertex_read_length, &vertex_varyings,
|
||||||
&pipeline->primitive_id_index);
|
&pipeline->primitive_id_index,
|
||||||
|
&flat_inputs);
|
||||||
|
|
||||||
sbe.AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline);
|
sbe.AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline);
|
||||||
sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
|
sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
|
||||||
sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs &
|
sbe.ConstantInterpolationEnable = flat_inputs;
|
||||||
((1u << vertex_varyings) - 1);
|
|
||||||
sbe.NumberofSFOutputAttributes = vertex_varyings;
|
sbe.NumberofSFOutputAttributes = vertex_varyings;
|
||||||
#if GFX_VERx10 >= 200
|
#if GFX_VERx10 >= 200
|
||||||
sbe.VertexAttributesBypass = wm_prog_data->vertex_attributes_bypass;
|
sbe.VertexAttributesBypass = wm_prog_data->vertex_attributes_bypass;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue