diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index d28894122e6..bc23043e362 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -361,6 +361,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_max_polygon_intel: case nir_intrinsic_load_ray_base_mem_addr_intel: case nir_intrinsic_load_ray_hw_stack_size_intel: + case nir_intrinsic_load_per_primitive_remap_intel: is_divergent = false; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 72a672296e0..b75c35f2fe5 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2336,6 +2336,10 @@ intrinsic("read_attribute_payload_intel", dest_comp=1, bit_sizes=[32], src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER]) +# Populate the per-primitive payload at an offset +# src[] = { value, offset } +intrinsic("store_per_primitive_payload_intel", src_comp=[-1], indices=[BASE, COMPONENT]) + # Number of data items being operated on for a SIMD program. system_value("simd_width_intel", 1) @@ -2417,6 +2421,9 @@ intrinsic("load_inline_data_intel", [], dest_comp=0, # Dynamic fragment shader parameters. system_value("fs_msaa_intel", 1) +# Per primitive remapping table offset. +system_value("per_primitive_remap_intel", 1) + # Intrinsics for Intel bindless thread dispatch # BASE=brw_topoloy_id system_value("topology_id_intel", 1, indices=[BASE]) diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 4a1f7bbdaba..177aec0d482 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4692,12 +4692,13 @@ iris_compute_first_urb_slot_required(struct iris_compiled_shader *fs_shader, const struct intel_vue_map *prev_stage_vue_map) { #if GFX_VER >= 9 - uint32_t read_offset, read_length, num_varyings, primid_offset; + uint32_t read_offset, read_length, num_varyings, primid_offset, flat_inputs; brw_compute_sbe_per_vertex_urb_read(prev_stage_vue_map, false /* mesh*/, + false /* per_primitive_remapping */, brw_wm_prog_data(fs_shader->brw_prog_data), &read_offset, &read_length, &num_varyings, - &primid_offset); + &primid_offset, &flat_inputs); return 2 * read_offset; #else const struct iris_fs_data *fs_data = iris_fs_data(fs_shader); diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index a40fb3f1e34..2978758edbd 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1303,6 +1303,12 @@ brw_assign_urb_setup(brw_shader &s) continue; } + if (inst->dst.file == ATTR) { + inst->dst = remap_attr_reg(s, prog_data, inst->dst, + urb_start, inst->exec_size); + continue; + } + for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == ATTR) { inst->src[i] = remap_attr_reg(s, prog_data, inst->src[i], @@ -1468,12 +1474,22 @@ brw_compile_fs(const struct brw_compiler *compiler, const struct intel_device_info *devinfo = compiler->devinfo; const unsigned max_subgroup_size = 32; + unsigned max_polygons = MAX2(1, params->max_polygons); brw_nir_apply_key(nir, compiler, &key->base, max_subgroup_size); - if (params->mue_map && params->mue_map->wa_18019110168_active) { - brw_nir_frag_convert_attrs_prim_to_vert( - nir, params->mue_map->per_primitive_offsets); + if (brw_nir_fragment_shader_needs_wa_18019110168(devinfo, key->mesh_input, nir)) { + if (params->mue_map && params->mue_map->wa_18019110168_active) { + brw_nir_frag_convert_attrs_prim_to_vert( + nir, params->mue_map->per_primitive_offsets); + } else { + NIR_PASS(_, nir, brw_nir_frag_convert_attrs_prim_to_vert_indirect, + devinfo, params); + } + /* Remapping per-primitive inputs into unused per-vertex inputs cannot + * work with multipolygon. + */ + max_polygons = 1; } brw_nir_lower_fs_inputs(nir, devinfo, key); @@ -1559,8 +1575,8 @@ brw_compile_fs(const struct brw_compiler *compiler, unsigned max_dispatch_width = reqd_dispatch_width ? reqd_dispatch_width : 32; brw_shader *vbase = NULL; - if (params->max_polygons >= 2 && !key->coarse_pixel) { - if (params->max_polygons >= 4 && max_dispatch_width >= 32 && + if (max_polygons >= 2 && !key->coarse_pixel) { + if (max_polygons >= 4 && max_dispatch_width >= 32 && 4 * prog_data->num_varying_inputs <= MAX_VARYING && INTEL_SIMD(FS, 4X8)) { /* Try a quad-SIMD8 compile */ @@ -1748,13 +1764,12 @@ brw_compile_fs(const struct brw_compiler *compiler, } if (devinfo->ver >= 12 && !has_spilled && - params->max_polygons >= 2 && !key->coarse_pixel && + max_polygons >= 2 && !key->coarse_pixel && reqd_dispatch_width == SUBGROUP_SIZE_VARYING) { brw_shader *vbase = v8 ? v8.get() : v16 ? v16.get() : v32.get(); assert(vbase); - if (devinfo->ver >= 20 && - params->max_polygons >= 4 && + if (devinfo->ver >= 20 && max_polygons >= 4 && vbase->max_dispatch_width >= 32 && 4 * prog_data->num_varying_inputs <= MAX_VARYING && INTEL_SIMD(FS, 4X8)) { @@ -1889,11 +1904,13 @@ brw_compile_fs(const struct brw_compiler *compiler, extern "C" void brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map, bool mesh, + bool per_primitive_remapping, const struct brw_wm_prog_data *wm_prog_data, uint32_t *out_read_offset, uint32_t *out_read_length, uint32_t *out_num_varyings, - uint32_t *out_primitive_id_offset) + uint32_t *out_primitive_id_offset, + uint32_t *out_flat_inputs) { int first_slot = INT32_MAX, last_slot = -1; @@ -1931,6 +1948,7 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m (first_slot >= 0 && last_slot >= 0 && last_slot >= first_slot)); uint32_t num_varyings = wm_prog_data->num_varying_inputs; + uint32_t remapped_flat_inputs = 0; /* When using INTEL_VUE_LAYOUT_SEPARATE_MESH, the location of the * PrimitiveID is unknown at compile time, here we compute the offset @@ -1939,7 +1957,19 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m */ *out_primitive_id_offset = 0; if (prev_stage_vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE_MESH) { - if (mesh) { + if (per_primitive_remapping && wm_prog_data->per_primitive_inputs != 0) { + /* When the mesh shader remaps per-primitive slots to per-vertex + * ones, read the entire set of slots. + */ + assert(mesh); + remapped_flat_inputs = + ((1u << prev_stage_vue_map->num_slots) - 1) & + ~((1u << last_slot) - 1); + *out_flat_inputs |= remapped_flat_inputs; + last_slot = prev_stage_vue_map->num_slots - 1; + *out_primitive_id_offset = INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH; + num_varyings = prev_stage_vue_map->num_slots - first_slot; + } else if (mesh) { /* When using Mesh, the PrimitiveID is in the per-primitive block. */ if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] >= 0) num_varyings--; @@ -1976,6 +2006,8 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m last_slot = MAX2(primitive_id_slot, last_slot); *out_primitive_id_offset = primitive_id_slot - first_slot; + /* Make sure to have constant interpolation on PrimitiveID */ + remapped_flat_inputs |= BITFIELD_BIT(*out_primitive_id_offset); } } @@ -1990,6 +2022,8 @@ brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_m *out_read_length = DIV_ROUND_UP(last_slot - first_slot + 1, 2); *out_num_varyings = num_varyings; } + + *out_flat_inputs = wm_prog_data->flat_inputs | remapped_flat_inputs; } extern "C" void @@ -2020,6 +2054,13 @@ brw_compute_sbe_per_primitive_urb_read(uint64_t inputs_read, break; } - *out_read_offset = DIV_ROUND_UP(first_read, 32); - *out_read_length = DIV_ROUND_UP(num_varyings, 2); + /* Not loading any per-primitive data in this case, the push constants + * should be adjusted though. + */ + if (mue_map->wa_18019110168_active) { + *out_read_offset = *out_read_length = 0; + } else { + *out_read_offset = DIV_ROUND_UP(first_read, 32); + *out_read_length = DIV_ROUND_UP(num_varyings, 2); + } } diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index f5fdb253cb3..1592ca00f0b 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -1316,19 +1316,24 @@ brw_compile_mesh(const struct brw_compiler *compiler, g.generate_code(selected->cfg, selected->dispatch_width, selected->shader_stats, selected->performance_analysis.require(), params->base.stats); if (prog_data->map.wa_18019110168_active) { + int8_t remap_table[VARYING_SLOT_TESS_MAX]; + memset(remap_table, -1, sizeof(remap_table)); + for (uint32_t i = 0; i < ARRAY_SIZE(wa_18019110168_mapping); i++) { + if (wa_18019110168_mapping[i] != -1) + remap_table[i] = prog_data->map.vue_map.varying_to_slot[wa_18019110168_mapping[i]]; + } uint8_t *const_data = (uint8_t *) rzalloc_size(params->base.mem_ctx, - nir->constant_data_size + - sizeof(prog_data->map.per_primitive_offsets)); + nir->constant_data_size + sizeof(remap_table)); memcpy(const_data, nir->constant_data, nir->constant_data_size); - memcpy(const_data + nir->constant_data_size, - prog_data->map.per_primitive_offsets, - sizeof(prog_data->map.per_primitive_offsets)); - g.add_const_data(const_data, - nir->constant_data_size + - sizeof(prog_data->map.per_primitive_offset)); + memcpy(const_data + nir->constant_data_size, remap_table, sizeof(remap_table)); + g.add_const_data(const_data, nir->constant_data_size + sizeof(remap_table)); prog_data->wa_18019110168_mapping_offset = prog_data->base.base.const_data_offset + nir->constant_data_size; + for (uint32_t i = 0; i < ARRAY_SIZE(remap_table); i++) { + if (remap_table[i] != -1) + fprintf(stderr, "%u -> %hhi\n", i, remap_table[i]); + } } else { g.add_const_data(nir->constant_data, nir->constant_data_size); } diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 09cc3263fca..a8e6e6a1ecb 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -533,6 +533,7 @@ enum brw_shader_reloc_id { BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH, BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH, BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH, + BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH, BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE, BRW_SHADER_RELOC_LAST_EMBEDDED_SAMPLER_HANDLE = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + BRW_MAX_EMBEDDED_SAMPLERS - 1, @@ -797,6 +798,12 @@ struct brw_wm_prog_data { */ unsigned msaa_flags_param; + /** + * Push constant location of the remapping offset in the instruction heap + * for Wa_18019110168. + */ + unsigned per_primitive_remap_param; + /** * Mask of which interpolation modes are required by the fragment shader. * Those interpolations are delivered as part of the thread payload. Used @@ -1721,12 +1728,13 @@ brw_compute_first_fs_urb_slot_required(uint64_t inputs_read, void brw_compute_sbe_per_vertex_urb_read(const struct intel_vue_map *prev_stage_vue_map, - bool mesh, + bool mesh, bool per_primitive_remapping, const struct brw_wm_prog_data *wm_prog_data, uint32_t *out_first_slot, uint32_t *num_slots, uint32_t *out_num_varyings, - uint32_t *out_primitive_id_offset); + uint32_t *out_primitive_id_offset, + uint32_t *out_flat_inputs); /** * Computes the URB offset at which SBE should read the per primitive date diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index dbef22a2039..c941cf43858 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -4428,6 +4428,20 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, break; } + case nir_intrinsic_store_per_primitive_payload_intel: { + const brw_builder ubld = bld.exec_all().group(1, 0); + brw_reg src = get_nir_src(ntb, instr->src[0], -1); + src = retype(bld.emit_uniformize(src), BRW_TYPE_UD); + + ubld.MOV(retype( + brw_per_primitive_reg(bld, + nir_intrinsic_base(instr), + nir_intrinsic_component(instr)), + BRW_TYPE_UD), + component(src, 0)); + break; + } + case nir_intrinsic_load_fs_input_interp_deltas: { assert(s.stage == MESA_SHADER_FRAGMENT); assert(nir_src_as_uint(instr->src[0]) == 0); @@ -4586,9 +4600,15 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons)); break; + case nir_intrinsic_load_per_primitive_remap_intel: + bld.MOV(retype(dest, BRW_TYPE_UD), + brw_dynamic_per_primitive_remap(brw_wm_prog_data(s.prog_data))); + break; + case nir_intrinsic_read_attribute_payload_intel: { - const brw_reg offset = retype(get_nir_src(ntb, instr->src[0], 0), - BRW_TYPE_UD); + const brw_reg offset = retype( + bld.emit_uniformize(get_nir_src(ntb, instr->src[0], 0)), + BRW_TYPE_UD); bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset); break; } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index dfb17a4d444..bf332829cc4 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -729,15 +729,59 @@ brw_nir_vertex_attribute_offset(nir_builder *b, 12); } +static nir_block * +fragment_top_block_or_after_wa_18019110168(nir_function_impl *impl) +{ + nir_if *first_if = + nir_block_get_following_if(nir_start_block(impl)); + nir_block *post_wa_18019110168_block = NULL; + if (first_if) { + nir_block *last_if_block = nir_if_last_then_block(first_if); + nir_foreach_block_in_cf_node(block, &first_if->cf_node) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_store_per_primitive_payload_intel) { + post_wa_18019110168_block = last_if_block->successors[0]; + break; + } + } + + if (post_wa_18019110168_block) + break; + } + } + + return post_wa_18019110168_block ? + post_wa_18019110168_block : nir_start_block(impl); +} + void brw_nir_lower_fs_inputs(nir_shader *nir, const struct intel_device_info *devinfo, const struct brw_wm_prog_key *key) { + /* Always pull the PrimitiveID from the per-primitive block if mesh can be + * involved. + */ + if (key->mesh_input != INTEL_NEVER) { + nir_foreach_shader_in_variable(var, nir) { + if (var->data.location == VARYING_SLOT_PRIMITIVE_ID) { + var->data.per_primitive = true; + nir->info.per_primitive_inputs |= VARYING_BIT_PRIMITIVE_ID; + } + } + } + nir_def *indirect_primitive_id = NULL; if (key->base.vue_layout == INTEL_VUE_LAYOUT_SEPARATE_MESH && (nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)) { - nir_builder _b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir))), *b = &_b; + nir_builder _b = nir_builder_at( + nir_before_block( + fragment_top_block_or_after_wa_18019110168( + nir_shader_get_entrypoint(nir)))), *b = &_b; nir_def *index = nir_ubitfield_extract_imm( b, nir_load_fs_msaa_intel(b), @@ -777,14 +821,6 @@ brw_nir_lower_fs_inputs(nir_shader *nir, var->data.interpolation = flat ? INTERP_MODE_FLAT : INTERP_MODE_SMOOTH; } - - /* Always pull the PrimitiveID from the per-primitive block if mesh can be involved. - */ - if (var->data.location == VARYING_SLOT_PRIMITIVE_ID && - key->mesh_input != INTEL_NEVER) { - var->data.per_primitive = true; - nir->info.per_primitive_inputs |= VARYING_BIT_PRIMITIVE_ID; - } } NIR_PASS(_, nir, nir_lower_io, @@ -2640,7 +2676,7 @@ brw_nir_move_interpolation_to_top(nir_shader *nir) bool progress = false; nir_foreach_function_impl(impl, nir) { - nir_block *top = nir_start_block(impl); + nir_block *top = fragment_top_block_or_after_wa_18019110168(impl); nir_cursor cursor = nir_before_instr(nir_block_first_instr(top)); bool impl_progress = false; diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 02774a9896e..7fd78c69053 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -339,6 +339,17 @@ brw_nir_mesh_shader_needs_wa_18019110168(const struct intel_device_info *devinfo VARYING_BIT_PRIMITIVE_COUNT)); } +static inline bool +brw_nir_fragment_shader_needs_wa_18019110168(const struct intel_device_info *devinfo, + enum intel_sometimes mesh_input, + nir_shader *shader) +{ + return intel_needs_workaround(devinfo, 18019110168) && + mesh_input != INTEL_NEVER && + (shader->info.per_primitive_inputs != 0 || + (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)); +} + void brw_nir_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir, struct brw_compile_mesh_params *params, @@ -348,6 +359,11 @@ bool brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, const int *wa_mapping); +bool +brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir, + const struct intel_device_info *devinfo, + struct brw_compile_fs_params *params); + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw_nir_wa_18019110168.c b/src/intel/compiler/brw_nir_wa_18019110168.c index d7cd20a8ed3..9e070fad645 100644 --- a/src/intel/compiler/brw_nir_wa_18019110168.c +++ b/src/intel/compiler/brw_nir_wa_18019110168.c @@ -535,3 +535,83 @@ brw_nir_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, return true; } + +bool +brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir, + const struct intel_device_info *devinfo, + struct brw_compile_fs_params *params) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder _b = nir_builder_at(nir_before_impl(impl)), *b = &_b; + + const uint64_t per_primitive_inputs = nir->info.inputs_read & + (nir->info.per_primitive_inputs | VARYING_BIT_PRIMITIVE_ID); + + int per_primitive_offsets[VARYING_SLOT_MAX]; + uint32_t first_read_offset = 0, per_primitive_stride = 0; + brw_compute_per_primitive_map(per_primitive_offsets, + &per_primitive_stride, + &first_read_offset, + 0, nir, nir_var_shader_in, + nir->info.per_primitive_inputs, + true /* separate_shader */); + + per_primitive_stride = align(per_primitive_stride, devinfo->grf_size); + + nir_def *msaa_flags = nir_load_fs_msaa_intel(b); + nir_def *needs_remapping = nir_test_mask( + b, msaa_flags, INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING); + nir_push_if(b, needs_remapping); + { + nir_def *first_slot = + nir_ubitfield_extract_imm( + b, msaa_flags, + INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET, + INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE); + nir_def *remap_table_addr = + nir_pack_64_2x32_split( + b, + nir_load_per_primitive_remap_intel(b), + nir_load_reloc_const_intel( + b, BRW_SHADER_RELOC_INSTRUCTION_BASE_ADDR_HIGH)); + u_foreach_bit64(location, per_primitive_inputs) { + if (location < VARYING_SLOT_VAR0 && + location != VARYING_SLOT_PRIMITIVE_ID) + continue; + + /* Read the varying_to_slot[] array from the mesh shader constants + * space in the instruction heap. + */ + nir_def *data = + nir_load_global_constant( + b, nir_iadd_imm(b, remap_table_addr, ROUND_DOWN_TO(location, 4)), + 4, 1, 32); + const unsigned bit_offset = (8 * location) % 32; + nir_def *absolute_attr_idx = + nir_ubitfield_extract_imm(b, data, bit_offset, 4); + /* Now remove the first slot visible in the FS payload */ + nir_def *payload_attr_idx = + nir_iadd(b, absolute_attr_idx, nir_ineg(b, first_slot)); + for (unsigned c = 0; c < 4; c++) { + /* brw_nir_vertex_attribute_offset works in scalar */ + nir_def *attr_idx = + nir_iadd_imm( + b, nir_imul_imm(b, payload_attr_idx, 4), c); + /* Turn the scalar attribute index into register byte offset */ + nir_def *per_vertex_offset = + nir_iadd_imm( + b, + brw_nir_vertex_attribute_offset(b, attr_idx, devinfo), + per_primitive_stride); + nir_def *value = + nir_read_attribute_payload_intel(b, per_vertex_offset); + /* Write back the values into the per-primitive location */ + nir_store_per_primitive_payload_intel( + b, value, .base = location, .component = c); + } + } + } + nir_pop_if(b, NULL); + + return nir_progress(true, impl, nir_metadata_none); +} diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 6ba50bdc500..2f06ca297c7 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -254,6 +254,12 @@ brw_dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data) return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD); } +inline brw_reg +brw_dynamic_per_primitive_remap(const struct brw_wm_prog_data *wm_prog_data) +{ + return brw_uniform_reg(wm_prog_data->per_primitive_remap_param, BRW_TYPE_UD); +} + enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key, nir_intrinsic_instr *intr); diff --git a/src/intel/compiler/intel_shader_enums.h b/src/intel/compiler/intel_shader_enums.h index 1b2acca3b74..1fa819ad284 100644 --- a/src/intel/compiler/intel_shader_enums.h +++ b/src/intel/compiler/intel_shader_enums.h @@ -30,6 +30,8 @@ intel_sometimes_invert(enum intel_sometimes x) return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x); } +#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET (19) +#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE (6) #define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (25) #define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE (6) #define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH (32) @@ -57,6 +59,9 @@ enum intel_msaa_flags { /** True if provoking vertex is last */ INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST = (1 << 5), + /** True if we need to apply Wa_18019110168 remapping */ + INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING = (1 << 6), + /** True if this shader has been dispatched coarse * * This is intentionally chose to be bit 15 to correspond to the coarse bit @@ -71,10 +76,16 @@ enum intel_msaa_flags { */ INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18), + /** First slot read in the VUE + * + * This is not a flag but a value that cover 6bits. + */ + INTEL_MSAA_FLAG_FIRST_VUE_SLOT = (1 << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET), + /** Index of the PrimitiveID attribute relative to the first read * attribute. * - * This is not a flag but a value that cover bits 20:31. Value 32 means the + * This is not a flag but a value that cover 6bits. Value 32 means the * PrimitiveID is coming from the PerPrimitive block, written by the Mesh * shader. */ @@ -441,7 +452,9 @@ struct intel_fs_params { bool coarse_pixel; bool alpha_to_coverage; bool provoking_vertex_last; + uint32_t first_vue_slot; uint32_t primitive_id_index; + bool per_primitive_remapping; }; static inline enum intel_msaa_flags @@ -473,6 +486,10 @@ intel_fs_msaa_flags(struct intel_fs_params params) if (params.alpha_to_coverage) fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE; + assert(params.first_vue_slot < (1 << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE)); + fs_msaa_flags |= (enum intel_msaa_flags)( + params.first_vue_slot << INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET); + assert(params.primitive_id_index < (1u << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_SIZE)); fs_msaa_flags |= (enum intel_msaa_flags)( params.primitive_id_index << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET); @@ -480,6 +497,9 @@ intel_fs_msaa_flags(struct intel_fs_params params) if (params.provoking_vertex_last) fs_msaa_flags |= INTEL_MSAA_FLAG_PROVOKING_VERTEX_LAST; + if (params.per_primitive_remapping) + fs_msaa_flags |= INTEL_MSAA_FLAG_PER_PRIMITIVE_REMAPPING; + return fs_msaa_flags; } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 68be3a3006b..26489da2e6c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -675,16 +675,16 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) anv_pipeline_emit(pipeline, final.sbe, GENX(3DSTATE_SBE), sbe) { anv_pipeline_emit(pipeline, final.sbe_swiz, GENX(3DSTATE_SBE_SWIZ), swiz) { int max_source_attr = 0; - uint32_t vertex_read_offset, vertex_read_length, vertex_varyings; + uint32_t vertex_read_offset, vertex_read_length, vertex_varyings, flat_inputs; brw_compute_sbe_per_vertex_urb_read( - vue_map, anv_pipeline_is_mesh(pipeline), wm_prog_data, + vue_map, anv_pipeline_is_mesh(pipeline), false, wm_prog_data, &vertex_read_offset, &vertex_read_length, &vertex_varyings, - &pipeline->primitive_id_index); + &pipeline->primitive_id_index, + &flat_inputs); sbe.AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline); sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT; - sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs & - ((1u << vertex_varyings) - 1); + sbe.ConstantInterpolationEnable = flat_inputs; sbe.NumberofSFOutputAttributes = vertex_varyings; #if GFX_VERx10 >= 200 sbe.VertexAttributesBypass = wm_prog_data->vertex_attributes_bypass;