diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index c4eab8f6e60..59363966626 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -598,6 +598,8 @@ enum tex_logical_srcs { TEX_LOGICAL_SRC_SAMPLER_HANDLE, /** Texel offset for gathers */ TEX_LOGICAL_SRC_TG4_OFFSET, + /** Texture offset */ + TEX_LOGICAL_SRC_PACKED_OFFSET, /** REQUIRED: Number of coordinate components (as UD immediate) */ TEX_LOGICAL_SRC_COORD_COMPONENTS, /** REQUIRED: Number of derivative components (as UD immediate) */ diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 4954d9c1951..fedd4a65688 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -85,38 +85,6 @@ static void brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst, const brw_reg &src, unsigned n); -static bool -brw_texture_offset(const nir_tex_instr *tex, unsigned src, - uint32_t *offset_bits_out) -{ - if (!nir_src_is_const(tex->src[src].src)) - return false; - - const unsigned num_components = nir_tex_instr_src_size(tex, src); - - /* Combine all three offsets into a single unsigned dword: - * - * bits 11:8 - U Offset (X component) - * bits 7:4 - V Offset (Y component) - * bits 3:0 - R Offset (Z component) - */ - uint32_t offset_bits = 0; - for (unsigned i = 0; i < num_components; i++) { - int offset = nir_src_comp_as_int(tex->src[src].src, i); - - /* offset out of bounds; caller will handle it. */ - if (offset > 7 || offset < -8) - return false; - - const unsigned shift = 4 * (2 - i); - offset_bits |= (offset & 0xF) << shift; - } - - *offset_bits_out = offset_bits; - - return true; -} - static brw_reg setup_imm_b(const brw_builder &bld, int8_t v) { @@ -7332,21 +7300,15 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD); break; - case nir_tex_src_offset: { - uint32_t offset_bits = 0; - if (brw_texture_offset(instr, i, &offset_bits)) { - header_bits |= offset_bits; - } else { - /* On gfx12.5+, if the offsets are not both constant and in the - * {-8,7} range, nir_lower_tex() will have already lowered the - * source offset. So we should never reach this point. - */ - assert(devinfo->verx10 < 125); - srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = - retype(src, BRW_TYPE_D); - } + case nir_tex_src_offset: + /* On gfx12.5+, if the offsets are not both constant and in the + * {-8,7} range, nir_lower_tex() will have already lowered the + * source offset. So we should never reach this point. + */ + assert(devinfo->verx10 < 125); + srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = + retype(src, BRW_TYPE_D); break; - } case nir_tex_src_projector: unreachable("should be lowered"); @@ -7390,10 +7352,20 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, * into a single (32-bit) value. */ case nir_tex_src_backend2: - assert(instr->op == nir_texop_tg4); - pack_lod_bias_and_offset = true; - srcs[TEX_LOGICAL_SRC_LOD] = - retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + /* For TG4, if there is a LOD, it would have been packed together + * with offsets, just put everything into SRC_LOD. + * + * Otherwise this is a packed offset. + */ + if (instr->op == nir_texop_tg4 && + (nir_tex_instr_src_index(instr, nir_tex_src_lod) != -1 || + nir_tex_instr_src_index(instr, nir_tex_src_bias) != -1)) { + pack_lod_bias_and_offset = true; + srcs[TEX_LOGICAL_SRC_LOD] = + retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); + } else { + srcs[TEX_LOGICAL_SRC_PACKED_OFFSET] = bld.emit_uniformize(src); + } break; /* If this parameter is present, we are packing either the explicit LOD diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 538d4bd54c4..455f6f78066 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -757,6 +757,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, const brw_reg &surface_handle, const brw_reg &sampler_handle, const brw_reg &tg4_offset, + const brw_reg &packed_offset, unsigned payload_type_bit_size, unsigned coord_components, unsigned grad_components, @@ -784,6 +785,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE)); if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 || + packed_offset.file != BAD_FILE || sampler_handle.file != BAD_FILE || is_high_sampler(devinfo, sampler) || residency) { @@ -825,8 +827,17 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst, ubld.MOV(header, brw_imm_ud(0)); else ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD)); - if (inst->offset) { - ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); + if (packed_offset.file != BAD_FILE || inst->offset) { + if (inst->offset && packed_offset.file != BAD_FILE) { + if (packed_offset.file == IMM) + ubld1.MOV(component(header, 2), brw_imm_ud(packed_offset.ud | inst->offset)); + else + ubld1.OR(component(header, 2), packed_offset, brw_imm_ud(inst->offset)); + } else if (packed_offset.file != BAD_FILE) { + ubld1.MOV(component(header, 2), packed_offset); + } else { + ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); + } } else if (devinfo->ver < 11 && bld.shader->stage != MESA_SHADER_VERTEX && bld.shader->stage != MESA_SHADER_FRAGMENT) { @@ -1286,6 +1297,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; + const brw_reg packed_offset = inst->src[TEX_LOGICAL_SRC_PACKED_OFFSET]; assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); @@ -1296,6 +1308,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) const unsigned msg_payload_type_bit_size = get_sampler_msg_payload_type_bit_size(devinfo, inst); + assert(tg4_offset.file == BAD_FILE || packed_offset.file == BAD_FILE); + /* 16-bit payloads are available only on gfx11+ */ assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11); @@ -1304,7 +1318,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) sample_index, mcs, surface, sampler, surface_handle, sampler_handle, - tg4_offset, + tg4_offset, packed_offset, msg_payload_type_bit_size, coord_components, grad_components, residency); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index bee39a40f7d..042f11d7adf 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1790,11 +1790,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (OPT(nir_lower_tex, &tex_options)) OPT(nir_lower_tex, &tex_options); - const struct brw_nir_lower_texture_opts brw_tex_options = { - .combined_lod_and_array_index = compiler->devinfo->ver >= 20, - .combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20, - }; - OPT(brw_nir_lower_texture, &brw_tex_options); + OPT(brw_nir_lower_texture, devinfo); OPT(intel_nir_lower_sparse_intrinsics); diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 54f0a655b74..95acf75930f 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -205,12 +205,8 @@ bool brw_nir_lower_texel_address(nir_shader *shader, const struct intel_device_info *devinfo, enum isl_tiling tiling); -struct brw_nir_lower_texture_opts { - bool combined_lod_and_array_index; - bool combined_lod_or_bias_and_offset; -}; bool brw_nir_lower_texture(nir_shader *nir, - const struct brw_nir_lower_texture_opts *opts); + const struct intel_device_info *devinfo); bool brw_nir_lower_sample_index_in_coord(nir_shader *nir); diff --git a/src/intel/compiler/brw_nir_lower_texture.c b/src/intel/compiler/brw_nir_lower_texture.c index aa478175bcc..b2430d26a69 100644 --- a/src/intel/compiler/brw_nir_lower_texture.c +++ b/src/intel/compiler/brw_nir_lower_texture.c @@ -22,6 +22,7 @@ */ #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" #include "brw_nir.h" /** @@ -103,8 +104,11 @@ pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex) static bool pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) { - int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); - if (offset_index < 0) + /* If there is no backend2, it means there was no offset to pack so just + * bail. + */ + int backend2_index = nir_tex_instr_src_index(tex, nir_tex_src_backend2); + if (backend2_index < 0) return false; /* If 32-bit texture coordinates are used, pack either the explicit LOD or @@ -130,18 +134,13 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) return false; } - nir_def *lod = tex->src[lod_index].src.ssa; - nir_def *offset = tex->src[offset_index].src.ssa; - - b->cursor = nir_before_instr(&tex->instr); - /* When using the programmable offsets instruction gather4_po_l_c with * SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters * on the 12 LSBs. For the offset parameters on gather instructions the 6 * least significant bits are honored as signed value with a range * [-32..31]. * - * Pack Offset U, and V for texture gather with offsets. + * Offsets should already have been packed in pack_const_offset(). * * ------------------------------------------ * |Bits | [31:12] | [11:6] | [5:0] | @@ -149,57 +148,129 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) * |OffsetUV | LOD/Bias | OffsetV | OffsetU | * ------------------------------------------ */ - nir_def *offu = nir_iand_imm(b, nir_channel(b, offset, 0), 0x3F); - nir_def *offv = nir_iand_imm(b, nir_channel(b, offset, 1), 0x3F); + nir_def *lod = tex->src[lod_index].src.ssa; + nir_def *backend2 = tex->src[backend2_index].src.ssa; - nir_def *offsetUV = nir_ior(b, offu, nir_ishl_imm(b, offv, 6)); + b->cursor = nir_before_instr(&tex->instr); - nir_def *lod_offsetUV = nir_ior(b, offsetUV, + nir_def *lod_offsetUV = nir_ior(b, backend2, nir_iand_imm(b, lod, 0xFFFFF000)); - nir_tex_instr_remove_src(tex, offset_index); - nir_tex_instr_add_src(tex, nir_tex_src_backend2, lod_offsetUV); + + nir_src_rewrite(&tex->src[backend2_index].src, lod_offsetUV); return true; } static bool -brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) +pack_offset(nir_builder *b, nir_tex_instr *tex, bool pack_6bits_offsets) { - if (instr->type != nir_instr_type_tex) + /* No offset, nothing to do */ + int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); + if (offset_index < 0) return false; - const struct brw_nir_lower_texture_opts *opts = cb_data; - nir_tex_instr *tex = nir_instr_as_tex(instr); + b->cursor = nir_before_instr(&tex->instr); + + nir_def *offset = tex->src[offset_index].src.ssa; + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + * + * Or for TG4 messages with pack_6bits_offsets=true, do the bottom packing + * of : + * + * ------------------------------------------ + * |Bits | [31:12] | [11:6] | [5:0] | + * ------------------------------------------ + * |OffsetUV | LOD/Bias | OffsetV | OffsetU | + * ------------------------------------------ + */ + const unsigned num_components = + nir_tex_instr_src_size(tex, offset_index); + + static const unsigned bits4_bits[] = { 4, 4, 4, }; + static const unsigned bits6_bits[] = { 6, 6, 0, }; + + offset = nir_pad_vector_imm_int(b, offset, 0, num_components); + offset = nir_format_clamp_sint( + b, offset, pack_6bits_offsets ? bits6_bits : bits4_bits); + + static const unsigned bits4_offsets[] = { 8, 4, 0, }; + static const unsigned bits6_offsets[] = { 0, 6, 0, }; + const unsigned *comp_bits_offsets = pack_6bits_offsets ? + bits6_offsets : bits4_offsets; + const unsigned value_mask = pack_6bits_offsets ? 0x3f : 0xf; + + nir_def *packed_offset = NULL; + for (unsigned c = 0; c < num_components; c++) { + nir_def *c_shifted = nir_ishl_imm( + b, + nir_iand_imm(b, nir_channel(b, offset, c), value_mask), + comp_bits_offsets[c]); + packed_offset = packed_offset == NULL ? c_shifted : nir_ior(b, packed_offset, c_shifted); + } + + nir_tex_instr_remove_src(tex, offset_index); + nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed_offset); + + return true; +} + +static bool +intel_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) +{ + const struct intel_device_info *devinfo = cb_data; + + const bool has_lod = + nir_tex_instr_src_index(tex, nir_tex_src_lod) != -1 || + nir_tex_instr_src_index(tex, nir_tex_src_bias) != -1; + /* On Gfx20+, when we have a LOD, we need to pack the offsets with it. When + * there is no LOD, the offsets are lowered in the coordinates (see + * lower_xehp_tg4_offset_filter). + */ + const bool needs_tg4_load_bias_offset_packing = + tex->op == nir_texop_tg4 && has_lod && + devinfo->ver >= 20; + const bool needs_tg4_offset_packing = devinfo->verx10 >= 125; + + bool progress = false; + + if (tex->op != nir_texop_txf && + (tex->op != nir_texop_tg4 || needs_tg4_offset_packing)) { + progress |= pack_offset(b, tex, needs_tg4_load_bias_offset_packing); + } switch (tex->op) { case nir_texop_txl: case nir_texop_txb: - case nir_texop_tg4: + case nir_texop_tg4: { if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - opts->combined_lod_and_array_index) { - return pack_lod_and_array_index(b, tex); + devinfo->ver >= 20) { + progress |= pack_lod_and_array_index(b, tex); } - if (tex->op == nir_texop_tg4 && opts->combined_lod_or_bias_and_offset) { - return pack_lod_or_bias_and_offset(b, tex); - } + if (needs_tg4_load_bias_offset_packing) + progress |= pack_lod_or_bias_and_offset(b, tex); - return false; + break; + } default: - /* Nothing to do */ - return false; + break; } - return false; + return progress; } bool brw_nir_lower_texture(nir_shader *shader, - const struct brw_nir_lower_texture_opts *opts) + const struct intel_device_info *devinfo) { - return nir_shader_instructions_pass(shader, - brw_nir_lower_texture_instr, - nir_metadata_none, - (void *)opts); + return nir_shader_tex_pass(shader, + intel_nir_lower_texture_instr, + nir_metadata_none, + (void *)devinfo); }