diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 500c50ce4b4..8c8b10438c2 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -3223,111 +3223,6 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_load_output: - case nir_intrinsic_load_per_vertex_output: { - assert(instr->def.bit_size == 32); - brw_reg indirect_offset = get_indirect_offset(ntb, instr); - unsigned imm_offset = nir_intrinsic_base(instr); - unsigned first_component = nir_intrinsic_component(instr); - - brw_urb_inst *urb; - if (indirect_offset.file == BAD_FILE) { - /* This MOV replicates the output handle to all enabled channels - * is SINGLE_PATCH mode. - */ - brw_reg patch_handle = bld.MOV(s.tcs_payload().patch_urb_output); - - { - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = patch_handle; - - if (first_component != 0) { - unsigned read_components = - instr->num_components + first_component; - brw_reg tmp = bld.vgrf(dst.type, read_components); - urb = bld.URB_READ(tmp, srcs, ARRAY_SIZE(srcs)); - urb->size_written = read_components * REG_SIZE * reg_unit(devinfo); - brw_combine_with_vec(bld, dst, offset(tmp, bld, first_component), - instr->num_components); - } else { - urb = bld.URB_READ(dst, srcs, ARRAY_SIZE(srcs)); - urb->size_written = instr->num_components * REG_SIZE * reg_unit(devinfo); - } - urb->offset = imm_offset * (devinfo->ver >= 20 ? 16 : 1); - } - } else { - /* Indirect indexing - use per-slot offsets as well. */ - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; - srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; - - if (first_component != 0) { - unsigned read_components = - instr->num_components + first_component; - brw_reg tmp = bld.vgrf(dst.type, read_components); - urb = bld.URB_READ(tmp, srcs, ARRAY_SIZE(srcs)); - urb->size_written = read_components * REG_SIZE * reg_unit(devinfo); - brw_combine_with_vec(bld, dst, offset(tmp, bld, first_component), - instr->num_components); - } else { - urb = bld.URB_READ(dst, srcs, ARRAY_SIZE(srcs)); - urb->size_written = instr->num_components * REG_SIZE * reg_unit(devinfo); - } - urb->offset = imm_offset * (devinfo->ver >= 20 ? 16 : 1); - } - break; - } - - case nir_intrinsic_store_output: - case nir_intrinsic_store_per_vertex_output: { - assert(nir_src_bit_size(instr->src[0]) == 32); - brw_reg value = get_nir_src(ntb, instr->src[0], -1); - brw_reg indirect_offset = get_indirect_offset(ntb, instr); - unsigned imm_offset = nir_intrinsic_base(instr); - unsigned mask = nir_intrinsic_write_mask(instr); - - if (mask == 0) - break; - - unsigned num_components = util_last_bit(mask); - unsigned first_component = nir_intrinsic_component(instr); - assert((first_component + num_components) <= 4); - - mask = mask << first_component; - - const bool has_urb_lsc = devinfo->ver >= 20; - - brw_reg mask_reg; - if (mask != WRITEMASK_XYZW) - mask_reg = brw_imm_ud(mask); - - brw_reg sources[4]; - - unsigned m = has_urb_lsc ? 0 : first_component; - for (unsigned i = 0; i < num_components; i++) { - int c = i + first_component; - if (mask & (1 << c)) { - sources[m++] = offset(value, bld, i); - } else if (devinfo->ver < 20) { - m++; - } - } - - assert(has_urb_lsc || m == (first_component + num_components)); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; - srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg; - srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_TYPE_F, m); - bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, m, 0); - - brw_urb_inst *urb = bld.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->offset = imm_offset * (devinfo->ver >= 20 ? 16 : 1); - urb->components = m; - break; - } - case nir_intrinsic_load_tess_config_intel: bld.MOV(retype(dst, BRW_TYPE_UD), brw_uniform_reg(tcs_prog_data->tess_config_param, BRW_TYPE_UD)); diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index 4e7274c56ad..d27c8737f2d 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1271,6 +1271,8 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, */ NIR_PASS(_, nir, nir_opt_constant_folding); NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out); + + NIR_PASS(_, nir, lower_outputs_to_urb_intrinsics, devinfo); } void