brw/nir: add intrinsics to read attribute payload register indirectly

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
Lionel Landwerlin 2025-04-29 12:50:42 +03:00 committed by Marge Bot
parent ef17fbf8e5
commit 9d342081e7
7 changed files with 51 additions and 3 deletions

View file

@ -354,6 +354,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_fs_msaa_intel:
case nir_intrinsic_load_constant_base_ptr:
case nir_intrinsic_load_const_buf_base_addr_lvp:
case nir_intrinsic_load_max_polygon_intel:
is_divergent = false;
break;
@ -708,7 +709,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_frag_size_ir3:
case nir_intrinsic_load_frag_offset_ir3:
case nir_intrinsic_bindless_resource_ir3:
case nir_intrinsic_ray_intersection_ir3: {
case nir_intrinsic_ray_intersection_ir3:
case nir_intrinsic_read_attribute_payload_intel: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) {
if (src_divergent(instr->src[i], state)) {

View file

@ -2303,6 +2303,15 @@ image("load_raw_intel", src_comp=[1], dest_comp=0,
flags=[CAN_ELIMINATE])
image("store_raw_intel", src_comp=[1, 0])
# Maximum number of polygons processed in the fragment shader
system_value("max_polygon_intel", 1, bit_sizes=[32])
# Read the attribute thread payload at a given offset
# src[] = { offset }
intrinsic("read_attribute_payload_intel", dest_comp=1, bit_sizes=[32],
src_comp=[1],
flags=[CAN_ELIMINATE, CAN_REORDER])
# Number of data items being operated on for a SIMD program.
system_value("simd_width_intel", 1)

View file

@ -1198,11 +1198,23 @@ brw_assign_urb_setup(brw_shader &s)
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);
int urb_start = s.payload().num_regs + prog_data->base.curb_read_length;
bool read_attribute_payload = false;
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
*/
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
if (inst->opcode == FS_OPCODE_READ_ATTRIBUTE_PAYLOAD) {
brw_reg offset = inst->src[0];
inst->resize_sources(3);
inst->opcode = SHADER_OPCODE_MOV_INDIRECT;
inst->src[0] = retype(brw_vec8_grf(urb_start, 0), BRW_TYPE_UD);
inst->src[1] = offset;
inst->src[2] = brw_imm_ud(REG_SIZE * 2 * 32);
read_attribute_payload = true;
continue;
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
/* ATTR brw_reg::nr in the FS is in units of logical scalar
@ -1359,11 +1371,18 @@ brw_assign_urb_setup(brw_shader &s)
}
}
if (read_attribute_payload) {
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
BRW_DEPENDENCY_VARIABLES);
}
/* Each attribute is 4 setup channels, each of which is half a reg,
* but they may be replicated multiple times for multipolygon
* dispatch.
*/
s.first_non_payload_grf += prog_data->num_varying_inputs * 2 * s.max_polygons;
s.first_non_payload_grf +=
(read_attribute_payload ? 32 : prog_data->num_varying_inputs) *
2 * s.max_polygons;
/* Unlike regular attributes, per-primitive attributes have all 4 channels
* in the same slot, so each GRF can store two slots.
@ -1440,6 +1459,9 @@ run_fs(brw_shader &s, bool allow_spilling, bool do_rep_send)
brw_assign_urb_setup(s);
s.debug_optimizer(nir, "urb_setup", 89, 0);
brw_lower_3src_null_dest(s);
brw_workaround_emit_dummy_mov_instruction(s);

View file

@ -512,6 +512,7 @@ enum opcode {
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
FS_OPCODE_READ_ATTRIBUTE_PAYLOAD,
/**
* GLSL barrier()

View file

@ -4593,6 +4593,17 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
brw_dynamic_msaa_flags(brw_wm_prog_data(s.prog_data)));
break;
case nir_intrinsic_load_max_polygon_intel:
bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
break;
case nir_intrinsic_read_attribute_payload_intel: {
const brw_reg offset = retype(get_nir_src(ntb, instr->src[0], 0),
BRW_TYPE_UD);
bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
break;
}
default:
brw_from_nir_emit_intrinsic(ntb, bld, instr);
break;

View file

@ -423,7 +423,8 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
get_fpu_lowered_simd_width(shader, inst));
}
case SHADER_OPCODE_MOV_INDIRECT: {
case SHADER_OPCODE_MOV_INDIRECT:
case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD: {
/* From IVB and HSW PRMs:
*
* "2.When the destination requires two registers and the sources are

View file

@ -245,6 +245,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "interp_shared_offset";
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return "interp_per_slot_offset";
case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD:
return "fs_read_attribute_payload";
case SHADER_OPCODE_BARRIER:
return "barrier";