mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
brw/nir: add intrinsics to read attribute payload register indirectly
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
parent
ef17fbf8e5
commit
9d342081e7
7 changed files with 51 additions and 3 deletions
|
|
@ -354,6 +354,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_fs_msaa_intel:
|
||||
case nir_intrinsic_load_constant_base_ptr:
|
||||
case nir_intrinsic_load_const_buf_base_addr_lvp:
|
||||
case nir_intrinsic_load_max_polygon_intel:
|
||||
is_divergent = false;
|
||||
break;
|
||||
|
||||
|
|
@ -708,7 +709,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_frag_size_ir3:
|
||||
case nir_intrinsic_load_frag_offset_ir3:
|
||||
case nir_intrinsic_bindless_resource_ir3:
|
||||
case nir_intrinsic_ray_intersection_ir3: {
|
||||
case nir_intrinsic_ray_intersection_ir3:
|
||||
case nir_intrinsic_read_attribute_payload_intel: {
|
||||
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (src_divergent(instr->src[i], state)) {
|
||||
|
|
|
|||
|
|
@ -2303,6 +2303,15 @@ image("load_raw_intel", src_comp=[1], dest_comp=0,
|
|||
flags=[CAN_ELIMINATE])
|
||||
image("store_raw_intel", src_comp=[1, 0])
|
||||
|
||||
# Maximum number of polygons processed in the fragment shader
|
||||
system_value("max_polygon_intel", 1, bit_sizes=[32])
|
||||
|
||||
# Read the attribute thread payload at a given offset
|
||||
# src[] = { offset }
|
||||
intrinsic("read_attribute_payload_intel", dest_comp=1, bit_sizes=[32],
|
||||
src_comp=[1],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Number of data items being operated on for a SIMD program.
|
||||
system_value("simd_width_intel", 1)
|
||||
|
||||
|
|
|
|||
|
|
@ -1198,11 +1198,23 @@ brw_assign_urb_setup(brw_shader &s)
|
|||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);
|
||||
|
||||
int urb_start = s.payload().num_regs + prog_data->base.curb_read_length;
|
||||
bool read_attribute_payload = false;
|
||||
|
||||
/* Offset all the urb_setup[] index by the actual position of the
|
||||
* setup regs, now that the location of the constants has been chosen.
|
||||
*/
|
||||
foreach_block_and_inst(block, brw_inst, inst, s.cfg) {
|
||||
if (inst->opcode == FS_OPCODE_READ_ATTRIBUTE_PAYLOAD) {
|
||||
brw_reg offset = inst->src[0];
|
||||
inst->resize_sources(3);
|
||||
inst->opcode = SHADER_OPCODE_MOV_INDIRECT;
|
||||
inst->src[0] = retype(brw_vec8_grf(urb_start, 0), BRW_TYPE_UD);
|
||||
inst->src[1] = offset;
|
||||
inst->src[2] = brw_imm_ud(REG_SIZE * 2 * 32);
|
||||
read_attribute_payload = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == ATTR) {
|
||||
/* ATTR brw_reg::nr in the FS is in units of logical scalar
|
||||
|
|
@ -1359,11 +1371,18 @@ brw_assign_urb_setup(brw_shader &s)
|
|||
}
|
||||
}
|
||||
|
||||
if (read_attribute_payload) {
|
||||
s.invalidate_analysis(BRW_DEPENDENCY_INSTRUCTIONS |
|
||||
BRW_DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
/* Each attribute is 4 setup channels, each of which is half a reg,
|
||||
* but they may be replicated multiple times for multipolygon
|
||||
* dispatch.
|
||||
*/
|
||||
s.first_non_payload_grf += prog_data->num_varying_inputs * 2 * s.max_polygons;
|
||||
s.first_non_payload_grf +=
|
||||
(read_attribute_payload ? 32 : prog_data->num_varying_inputs) *
|
||||
2 * s.max_polygons;
|
||||
|
||||
/* Unlike regular attributes, per-primitive attributes have all 4 channels
|
||||
* in the same slot, so each GRF can store two slots.
|
||||
|
|
@ -1440,6 +1459,9 @@ run_fs(brw_shader &s, bool allow_spilling, bool do_rep_send)
|
|||
|
||||
brw_assign_urb_setup(s);
|
||||
|
||||
s.debug_optimizer(nir, "urb_setup", 89, 0);
|
||||
|
||||
|
||||
brw_lower_3src_null_dest(s);
|
||||
brw_workaround_emit_dummy_mov_instruction(s);
|
||||
|
||||
|
|
|
|||
|
|
@ -512,6 +512,7 @@ enum opcode {
|
|||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
|
||||
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
|
||||
FS_OPCODE_READ_ATTRIBUTE_PAYLOAD,
|
||||
|
||||
/**
|
||||
* GLSL barrier()
|
||||
|
|
|
|||
|
|
@ -4593,6 +4593,17 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
|||
brw_dynamic_msaa_flags(brw_wm_prog_data(s.prog_data)));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_max_polygon_intel:
|
||||
bld.MOV(retype(dest, BRW_TYPE_UD), brw_imm_ud(s.max_polygons));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_read_attribute_payload_intel: {
|
||||
const brw_reg offset = retype(get_nir_src(ntb, instr->src[0], 0),
|
||||
BRW_TYPE_UD);
|
||||
bld.emit(FS_OPCODE_READ_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
brw_from_nir_emit_intrinsic(ntb, bld, instr);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -423,7 +423,8 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
|
|||
swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
|
||||
get_fpu_lowered_simd_width(shader, inst));
|
||||
}
|
||||
case SHADER_OPCODE_MOV_INDIRECT: {
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD: {
|
||||
/* From IVB and HSW PRMs:
|
||||
*
|
||||
* "2.When the destination requires two registers and the sources are
|
||||
|
|
|
|||
|
|
@ -245,6 +245,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
return "interp_shared_offset";
|
||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
return "interp_per_slot_offset";
|
||||
case FS_OPCODE_READ_ATTRIBUTE_PAYLOAD:
|
||||
return "fs_read_attribute_payload";
|
||||
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
return "barrier";
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue