diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 1b57739b07a..42ed9650f55 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -630,8 +630,6 @@ enum tex_logical_srcs { TEX_LOGICAL_SRC_SAMPLER_HANDLE, /** Texel offset for gathers */ TEX_LOGICAL_SRC_TG4_OFFSET, - /** Texture offset */ - TEX_LOGICAL_SRC_PACKED_OFFSET, /** REQUIRED: Number of coordinate components (as UD immediate) */ TEX_LOGICAL_SRC_COORD_COMPONENTS, /** REQUIRED: Number of derivative components (as UD immediate) */ diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index b1d07ee0876..1c2f4d89fb9 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -84,6 +84,38 @@ static void brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst, const brw_reg &src, unsigned n); +static bool +brw_texture_offset(const nir_tex_instr *tex, unsigned src, + uint32_t *offset_bits_out) +{ + if (!nir_src_is_const(tex->src[src].src)) + return false; + + const unsigned num_components = nir_tex_instr_src_size(tex, src); + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + */ + uint32_t offset_bits = 0; + for (unsigned i = 0; i < num_components; i++) { + int offset = nir_src_comp_as_int(tex->src[src].src, i); + + /* offset out of bounds; caller will handle it. */ + if (offset > 7 || offset < -8) + return false; + + const unsigned shift = 4 * (2 - i); + offset_bits |= (offset & 0xF) << shift; + } + + *offset_bits_out = offset_bits; + + return true; +} + static brw_reg setup_imm_b(const brw_builder &bld, int8_t v) { @@ -7493,15 +7525,21 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD); break; - case nir_tex_src_offset: - /* On gfx12.5+, if the offsets are not both constant and in the - * {-8,7} range, nir_lower_tex() will have already lowered the - * source offset. So we should never reach this point. - */ - assert(devinfo->verx10 < 125); - srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = - retype(src, BRW_TYPE_D); + case nir_tex_src_offset: { + uint32_t offset_bits = 0; + if (brw_texture_offset(instr, i, &offset_bits)) { + header_bits |= offset_bits; + } else { + /* On gfx12.5+, if the offsets are not both constant and in the + * {-8,7} range, nir_lower_tex() will have already lowered the + * source offset. So we should never reach this point. + */ + assert(devinfo->verx10 < 125); + srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = + retype(src, BRW_TYPE_D); + } break; + } case nir_tex_src_projector: UNREACHABLE("should be lowered"); @@ -7545,20 +7583,10 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, * into a single (32-bit) value. */ case nir_tex_src_backend2: - /* For TG4, if there is a LOD, it would have been packed together - * with offsets, just put everything into SRC_LOD. - * - * Otherwise this is a packed offset. - */ - if (instr->op == nir_texop_tg4 && - (nir_tex_instr_src_index(instr, nir_tex_src_lod) != -1 || - nir_tex_instr_src_index(instr, nir_tex_src_bias) != -1)) { - pack_lod_bias_and_offset = true; - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); - } else { - srcs[TEX_LOGICAL_SRC_PACKED_OFFSET] = bld.emit_uniformize(src); - } + assert(instr->op == nir_texop_tg4); + pack_lod_bias_and_offset = true; + srcs[TEX_LOGICAL_SRC_LOD] = + retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); break; /* If this parameter is present, we are packing either the explicit LOD diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 891a3d03fb5..d1913c8e1ac 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -790,7 +790,6 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, const brw_reg &surface_handle, const brw_reg &sampler_handle, const brw_reg &tg4_offset, - const brw_reg &packed_offset, unsigned payload_type_bit_size, unsigned coord_components, unsigned grad_components, @@ -818,7 +817,6 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE)); if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 || - packed_offset.file != BAD_FILE || sampler_handle.file != BAD_FILE || is_high_sampler(devinfo, sampler) || residency) { @@ -860,17 +858,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, ubld.MOV(header, brw_imm_ud(0)); else ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD)); - if (packed_offset.file != BAD_FILE || inst->offset) { - if (inst->offset && packed_offset.file != BAD_FILE) { - if (packed_offset.file == IMM) - ubld1.MOV(component(header, 2), brw_imm_ud(packed_offset.ud | inst->offset)); - else - ubld1.OR(component(header, 2), packed_offset, brw_imm_ud(inst->offset)); - } else if (packed_offset.file != BAD_FILE) { - ubld1.MOV(component(header, 2), packed_offset); - } else { - ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); - } + if (inst->offset) { + ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); } else if (devinfo->ver < 11 && bld.shader->stage != MESA_SHADER_VERTEX && bld.shader->stage != MESA_SHADER_FRAGMENT) { @@ -1331,7 +1320,6 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; - const brw_reg packed_offset = inst->src[TEX_LOGICAL_SRC_PACKED_OFFSET]; assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); @@ -1342,8 +1330,6 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) const unsigned msg_payload_type_bit_size = get_sampler_msg_payload_type_bit_size(devinfo, inst); - assert(tg4_offset.file == BAD_FILE || packed_offset.file == BAD_FILE); - /* 16-bit payloads are available only on gfx11+ */ assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11); @@ -1352,7 +1338,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) sample_index, mcs, surface, sampler, surface_handle, sampler_handle, - tg4_offset, packed_offset, + tg4_offset, msg_payload_type_bit_size, coord_components, grad_components, residency); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 1be5c2852e1..d090341f6fe 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2010,7 +2010,11 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (OPT(nir_lower_tex, &tex_options)) OPT(nir_lower_tex, &tex_options); - OPT(brw_nir_lower_texture, devinfo); + const struct brw_nir_lower_texture_opts brw_tex_options = { + .combined_lod_and_array_index = compiler->devinfo->ver >= 20, + .combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20, + }; + OPT(brw_nir_lower_texture, &brw_tex_options); OPT(intel_nir_lower_sparse_intrinsics); diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 58c81f246e2..b161231a2f7 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -208,8 +208,12 @@ bool brw_nir_lower_texel_address(nir_shader *shader, const struct intel_device_info *devinfo, enum isl_tiling tiling); +struct brw_nir_lower_texture_opts { + bool combined_lod_and_array_index; + bool combined_lod_or_bias_and_offset; +}; bool brw_nir_lower_texture(nir_shader *nir, - const struct intel_device_info *devinfo); + const struct brw_nir_lower_texture_opts *opts); bool brw_nir_lower_sample_index_in_coord(nir_shader *nir); diff --git a/src/intel/compiler/brw_nir_lower_texture.c b/src/intel/compiler/brw_nir_lower_texture.c index b2430d26a69..aa478175bcc 100644 --- a/src/intel/compiler/brw_nir_lower_texture.c +++ b/src/intel/compiler/brw_nir_lower_texture.c @@ -22,7 +22,6 @@ */ #include "compiler/nir/nir_builder.h" -#include "compiler/nir/nir_format_convert.h" #include "brw_nir.h" /** @@ -104,11 +103,8 @@ pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex) static bool pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) { - /* If there is no backend2, it means there was no offset to pack so just - * bail. - */ - int backend2_index = nir_tex_instr_src_index(tex, nir_tex_src_backend2); - if (backend2_index < 0) + int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); + if (offset_index < 0) return false; /* If 32-bit texture coordinates are used, pack either the explicit LOD or @@ -134,13 +130,18 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) return false; } + nir_def *lod = tex->src[lod_index].src.ssa; + nir_def *offset = tex->src[offset_index].src.ssa; + + b->cursor = nir_before_instr(&tex->instr); + /* When using the programmable offsets instruction gather4_po_l_c with * SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters * on the 12 LSBs. For the offset parameters on gather instructions the 6 * least significant bits are honored as signed value with a range * [-32..31]. * - * Offsets should already have been packed in pack_const_offset(). + * Pack Offset U, and V for texture gather with offsets. * * ------------------------------------------ * |Bits | [31:12] | [11:6] | [5:0] | @@ -148,129 +149,57 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) * |OffsetUV | LOD/Bias | OffsetV | OffsetU | * ------------------------------------------ */ - nir_def *lod = tex->src[lod_index].src.ssa; - nir_def *backend2 = tex->src[backend2_index].src.ssa; + nir_def *offu = nir_iand_imm(b, nir_channel(b, offset, 0), 0x3F); + nir_def *offv = nir_iand_imm(b, nir_channel(b, offset, 1), 0x3F); - b->cursor = nir_before_instr(&tex->instr); + nir_def *offsetUV = nir_ior(b, offu, nir_ishl_imm(b, offv, 6)); - nir_def *lod_offsetUV = nir_ior(b, backend2, + nir_def *lod_offsetUV = nir_ior(b, offsetUV, nir_iand_imm(b, lod, 0xFFFFF000)); - - nir_src_rewrite(&tex->src[backend2_index].src, lod_offsetUV); + nir_tex_instr_remove_src(tex, offset_index); + nir_tex_instr_add_src(tex, nir_tex_src_backend2, lod_offsetUV); return true; } static bool -pack_offset(nir_builder *b, nir_tex_instr *tex, bool pack_6bits_offsets) +brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) { - /* No offset, nothing to do */ - int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); - if (offset_index < 0) + if (instr->type != nir_instr_type_tex) return false; - b->cursor = nir_before_instr(&tex->instr); - - nir_def *offset = tex->src[offset_index].src.ssa; - - /* Combine all three offsets into a single unsigned dword: - * - * bits 11:8 - U Offset (X component) - * bits 7:4 - V Offset (Y component) - * bits 3:0 - R Offset (Z component) - * - * Or for TG4 messages with pack_6bits_offsets=true, do the bottom packing - * of : - * - * ------------------------------------------ - * |Bits | [31:12] | [11:6] | [5:0] | - * ------------------------------------------ - * |OffsetUV | LOD/Bias | OffsetV | OffsetU | - * ------------------------------------------ - */ - const unsigned num_components = - nir_tex_instr_src_size(tex, offset_index); - - static const unsigned bits4_bits[] = { 4, 4, 4, }; - static const unsigned bits6_bits[] = { 6, 6, 0, }; - - offset = nir_pad_vector_imm_int(b, offset, 0, num_components); - offset = nir_format_clamp_sint( - b, offset, pack_6bits_offsets ? bits6_bits : bits4_bits); - - static const unsigned bits4_offsets[] = { 8, 4, 0, }; - static const unsigned bits6_offsets[] = { 0, 6, 0, }; - const unsigned *comp_bits_offsets = pack_6bits_offsets ? - bits6_offsets : bits4_offsets; - const unsigned value_mask = pack_6bits_offsets ? 0x3f : 0xf; - - nir_def *packed_offset = NULL; - for (unsigned c = 0; c < num_components; c++) { - nir_def *c_shifted = nir_ishl_imm( - b, - nir_iand_imm(b, nir_channel(b, offset, c), value_mask), - comp_bits_offsets[c]); - packed_offset = packed_offset == NULL ? c_shifted : nir_ior(b, packed_offset, c_shifted); - } - - nir_tex_instr_remove_src(tex, offset_index); - nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed_offset); - - return true; -} - -static bool -intel_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) -{ - const struct intel_device_info *devinfo = cb_data; - - const bool has_lod = - nir_tex_instr_src_index(tex, nir_tex_src_lod) != -1 || - nir_tex_instr_src_index(tex, nir_tex_src_bias) != -1; - /* On Gfx20+, when we have a LOD, we need to pack the offsets with it. When - * there is no LOD, the offsets are lowered in the coordinates (see - * lower_xehp_tg4_offset_filter). - */ - const bool needs_tg4_load_bias_offset_packing = - tex->op == nir_texop_tg4 && has_lod && - devinfo->ver >= 20; - const bool needs_tg4_offset_packing = devinfo->verx10 >= 125; - - bool progress = false; - - if (tex->op != nir_texop_txf && - (tex->op != nir_texop_tg4 || needs_tg4_offset_packing)) { - progress |= pack_offset(b, tex, needs_tg4_load_bias_offset_packing); - } + const struct brw_nir_lower_texture_opts *opts = cb_data; + nir_tex_instr *tex = nir_instr_as_tex(instr); switch (tex->op) { case nir_texop_txl: case nir_texop_txb: - case nir_texop_tg4: { + case nir_texop_tg4: if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - devinfo->ver >= 20) { - progress |= pack_lod_and_array_index(b, tex); + opts->combined_lod_and_array_index) { + return pack_lod_and_array_index(b, tex); } - if (needs_tg4_load_bias_offset_packing) - progress |= pack_lod_or_bias_and_offset(b, tex); + if (tex->op == nir_texop_tg4 && opts->combined_lod_or_bias_and_offset) { + return pack_lod_or_bias_and_offset(b, tex); + } - break; - } + return false; default: - break; + /* Nothing to do */ + return false; } - return progress; + return false; } bool brw_nir_lower_texture(nir_shader *shader, - const struct intel_device_info *devinfo) + const struct brw_nir_lower_texture_opts *opts) { - return nir_shader_tex_pass(shader, - intel_nir_lower_texture_instr, - nir_metadata_none, - (void *)devinfo); + return nir_shader_instructions_pass(shader, + brw_nir_lower_texture_instr, + nir_metadata_none, + (void *)opts); }