brw: move texture offset packing to NIR

That way we can deal with upcoming non constant values for
VK_KHR_maintenance8.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33138>
This commit is contained in:
Lionel Landwerlin 2025-01-21 14:17:11 +02:00 committed by Marge Bot
parent 67ae49dede
commit 4346210ae6
6 changed files with 147 additions and 96 deletions

View file

@ -598,6 +598,8 @@ enum tex_logical_srcs {
TEX_LOGICAL_SRC_SAMPLER_HANDLE, TEX_LOGICAL_SRC_SAMPLER_HANDLE,
/** Texel offset for gathers */ /** Texel offset for gathers */
TEX_LOGICAL_SRC_TG4_OFFSET, TEX_LOGICAL_SRC_TG4_OFFSET,
/** Texture offset */
TEX_LOGICAL_SRC_PACKED_OFFSET,
/** REQUIRED: Number of coordinate components (as UD immediate) */ /** REQUIRED: Number of coordinate components (as UD immediate) */
TEX_LOGICAL_SRC_COORD_COMPONENTS, TEX_LOGICAL_SRC_COORD_COMPONENTS,
/** REQUIRED: Number of derivative components (as UD immediate) */ /** REQUIRED: Number of derivative components (as UD immediate) */

View file

@ -85,38 +85,6 @@ static void brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst, static void brw_combine_with_vec(const brw_builder &bld, const brw_reg &dst,
const brw_reg &src, unsigned n); const brw_reg &src, unsigned n);
static bool
brw_texture_offset(const nir_tex_instr *tex, unsigned src,
uint32_t *offset_bits_out)
{
if (!nir_src_is_const(tex->src[src].src))
return false;
const unsigned num_components = nir_tex_instr_src_size(tex, src);
/* Combine all three offsets into a single unsigned dword:
*
* bits 11:8 - U Offset (X component)
* bits 7:4 - V Offset (Y component)
* bits 3:0 - R Offset (Z component)
*/
uint32_t offset_bits = 0;
for (unsigned i = 0; i < num_components; i++) {
int offset = nir_src_comp_as_int(tex->src[src].src, i);
/* offset out of bounds; caller will handle it. */
if (offset > 7 || offset < -8)
return false;
const unsigned shift = 4 * (2 - i);
offset_bits |= (offset & 0xF) << shift;
}
*offset_bits_out = offset_bits;
return true;
}
static brw_reg static brw_reg
setup_imm_b(const brw_builder &bld, int8_t v) setup_imm_b(const brw_builder &bld, int8_t v)
{ {
@ -7332,21 +7300,15 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD); srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_TYPE_UD);
break; break;
case nir_tex_src_offset: { case nir_tex_src_offset:
uint32_t offset_bits = 0; /* On gfx12.5+, if the offsets are not both constant and in the
if (brw_texture_offset(instr, i, &offset_bits)) { * {-8,7} range, nir_lower_tex() will have already lowered the
header_bits |= offset_bits; * source offset. So we should never reach this point.
} else { */
/* On gfx12.5+, if the offsets are not both constant and in the assert(devinfo->verx10 < 125);
* {-8,7} range, nir_lower_tex() will have already lowered the srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
* source offset. So we should never reach this point. retype(src, BRW_TYPE_D);
*/
assert(devinfo->verx10 < 125);
srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
retype(src, BRW_TYPE_D);
}
break; break;
}
case nir_tex_src_projector: case nir_tex_src_projector:
unreachable("should be lowered"); unreachable("should be lowered");
@ -7390,10 +7352,20 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
* into a single (32-bit) value. * into a single (32-bit) value.
*/ */
case nir_tex_src_backend2: case nir_tex_src_backend2:
assert(instr->op == nir_texop_tg4); /* For TG4, if there is a LOD, it would have been packed together
pack_lod_bias_and_offset = true; * with offsets, just put everything into SRC_LOD.
srcs[TEX_LOGICAL_SRC_LOD] = *
retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F); * Otherwise this is a packed offset.
*/
if (instr->op == nir_texop_tg4 &&
(nir_tex_instr_src_index(instr, nir_tex_src_lod) != -1 ||
nir_tex_instr_src_index(instr, nir_tex_src_bias) != -1)) {
pack_lod_bias_and_offset = true;
srcs[TEX_LOGICAL_SRC_LOD] =
retype(get_nir_src_imm(ntb, instr->src[i].src), BRW_TYPE_F);
} else {
srcs[TEX_LOGICAL_SRC_PACKED_OFFSET] = bld.emit_uniformize(src);
}
break; break;
/* If this parameter is present, we are packing either the explicit LOD /* If this parameter is present, we are packing either the explicit LOD

View file

@ -757,6 +757,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
const brw_reg &surface_handle, const brw_reg &surface_handle,
const brw_reg &sampler_handle, const brw_reg &sampler_handle,
const brw_reg &tg4_offset, const brw_reg &tg4_offset,
const brw_reg &packed_offset,
unsigned payload_type_bit_size, unsigned payload_type_bit_size,
unsigned coord_components, unsigned coord_components,
unsigned grad_components, unsigned grad_components,
@ -784,6 +785,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE)); assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 || if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 ||
packed_offset.file != BAD_FILE ||
sampler_handle.file != BAD_FILE || sampler_handle.file != BAD_FILE ||
is_high_sampler(devinfo, sampler) || is_high_sampler(devinfo, sampler) ||
residency) { residency) {
@ -825,8 +827,17 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst,
ubld.MOV(header, brw_imm_ud(0)); ubld.MOV(header, brw_imm_ud(0));
else else
ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD)); ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
if (inst->offset) { if (packed_offset.file != BAD_FILE || inst->offset) {
ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); if (inst->offset && packed_offset.file != BAD_FILE) {
if (packed_offset.file == IMM)
ubld1.MOV(component(header, 2), brw_imm_ud(packed_offset.ud | inst->offset));
else
ubld1.OR(component(header, 2), packed_offset, brw_imm_ud(inst->offset));
} else if (packed_offset.file != BAD_FILE) {
ubld1.MOV(component(header, 2), packed_offset);
} else {
ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
}
} else if (devinfo->ver < 11 && } else if (devinfo->ver < 11 &&
bld.shader->stage != MESA_SHADER_VERTEX && bld.shader->stage != MESA_SHADER_VERTEX &&
bld.shader->stage != MESA_SHADER_FRAGMENT) { bld.shader->stage != MESA_SHADER_FRAGMENT) {
@ -1286,6 +1297,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
const brw_reg packed_offset = inst->src[TEX_LOGICAL_SRC_PACKED_OFFSET];
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
@ -1296,6 +1308,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
const unsigned msg_payload_type_bit_size = const unsigned msg_payload_type_bit_size =
get_sampler_msg_payload_type_bit_size(devinfo, inst); get_sampler_msg_payload_type_bit_size(devinfo, inst);
assert(tg4_offset.file == BAD_FILE || packed_offset.file == BAD_FILE);
/* 16-bit payloads are available only on gfx11+ */ /* 16-bit payloads are available only on gfx11+ */
assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11); assert(msg_payload_type_bit_size != 16 || devinfo->ver >= 11);
@ -1304,7 +1318,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
sample_index, sample_index,
mcs, surface, sampler, mcs, surface, sampler,
surface_handle, sampler_handle, surface_handle, sampler_handle,
tg4_offset, tg4_offset, packed_offset,
msg_payload_type_bit_size, msg_payload_type_bit_size,
coord_components, grad_components, coord_components, grad_components,
residency); residency);

View file

@ -1790,11 +1790,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
if (OPT(nir_lower_tex, &tex_options)) if (OPT(nir_lower_tex, &tex_options))
OPT(nir_lower_tex, &tex_options); OPT(nir_lower_tex, &tex_options);
const struct brw_nir_lower_texture_opts brw_tex_options = { OPT(brw_nir_lower_texture, devinfo);
.combined_lod_and_array_index = compiler->devinfo->ver >= 20,
.combined_lod_or_bias_and_offset = compiler->devinfo->ver >= 20,
};
OPT(brw_nir_lower_texture, &brw_tex_options);
OPT(intel_nir_lower_sparse_intrinsics); OPT(intel_nir_lower_sparse_intrinsics);

View file

@ -205,12 +205,8 @@ bool brw_nir_lower_texel_address(nir_shader *shader,
const struct intel_device_info *devinfo, const struct intel_device_info *devinfo,
enum isl_tiling tiling); enum isl_tiling tiling);
struct brw_nir_lower_texture_opts {
bool combined_lod_and_array_index;
bool combined_lod_or_bias_and_offset;
};
bool brw_nir_lower_texture(nir_shader *nir, bool brw_nir_lower_texture(nir_shader *nir,
const struct brw_nir_lower_texture_opts *opts); const struct intel_device_info *devinfo);
bool brw_nir_lower_sample_index_in_coord(nir_shader *nir); bool brw_nir_lower_sample_index_in_coord(nir_shader *nir);

View file

@ -22,6 +22,7 @@
*/ */
#include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "brw_nir.h" #include "brw_nir.h"
/** /**
@ -103,8 +104,11 @@ pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
static bool static bool
pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex) pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
{ {
int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); /* If there is no backend2, it means there was no offset to pack so just
if (offset_index < 0) * bail.
*/
int backend2_index = nir_tex_instr_src_index(tex, nir_tex_src_backend2);
if (backend2_index < 0)
return false; return false;
/* If 32-bit texture coordinates are used, pack either the explicit LOD or /* If 32-bit texture coordinates are used, pack either the explicit LOD or
@ -130,18 +134,13 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
return false; return false;
} }
nir_def *lod = tex->src[lod_index].src.ssa;
nir_def *offset = tex->src[offset_index].src.ssa;
b->cursor = nir_before_instr(&tex->instr);
/* When using the programmable offsets instruction gather4_po_l_c with /* When using the programmable offsets instruction gather4_po_l_c with
* SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters * SIMD16 or SIMD32 the U, V offsets are combined with LOD/bias parameters
* on the 12 LSBs. For the offset parameters on gather instructions the 6 * on the 12 LSBs. For the offset parameters on gather instructions the 6
* least significant bits are honored as signed value with a range * least significant bits are honored as signed value with a range
* [-32..31]. * [-32..31].
* *
* Pack Offset U, and V for texture gather with offsets. * Offsets should already have been packed in pack_const_offset().
* *
* ------------------------------------------ * ------------------------------------------
* |Bits | [31:12] | [11:6] | [5:0] | * |Bits | [31:12] | [11:6] | [5:0] |
@ -149,57 +148,129 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex)
* |OffsetUV | LOD/Bias | OffsetV | OffsetU | * |OffsetUV | LOD/Bias | OffsetV | OffsetU |
* ------------------------------------------ * ------------------------------------------
*/ */
nir_def *offu = nir_iand_imm(b, nir_channel(b, offset, 0), 0x3F); nir_def *lod = tex->src[lod_index].src.ssa;
nir_def *offv = nir_iand_imm(b, nir_channel(b, offset, 1), 0x3F); nir_def *backend2 = tex->src[backend2_index].src.ssa;
nir_def *offsetUV = nir_ior(b, offu, nir_ishl_imm(b, offv, 6)); b->cursor = nir_before_instr(&tex->instr);
nir_def *lod_offsetUV = nir_ior(b, offsetUV, nir_def *lod_offsetUV = nir_ior(b, backend2,
nir_iand_imm(b, lod, 0xFFFFF000)); nir_iand_imm(b, lod, 0xFFFFF000));
nir_tex_instr_remove_src(tex, offset_index);
nir_tex_instr_add_src(tex, nir_tex_src_backend2, lod_offsetUV); nir_src_rewrite(&tex->src[backend2_index].src, lod_offsetUV);
return true; return true;
} }
static bool static bool
brw_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) pack_offset(nir_builder *b, nir_tex_instr *tex, bool pack_6bits_offsets)
{ {
if (instr->type != nir_instr_type_tex) /* No offset, nothing to do */
int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
if (offset_index < 0)
return false; return false;
const struct brw_nir_lower_texture_opts *opts = cb_data; b->cursor = nir_before_instr(&tex->instr);
nir_tex_instr *tex = nir_instr_as_tex(instr);
nir_def *offset = tex->src[offset_index].src.ssa;
/* Combine all three offsets into a single unsigned dword:
*
* bits 11:8 - U Offset (X component)
* bits 7:4 - V Offset (Y component)
* bits 3:0 - R Offset (Z component)
*
* Or for TG4 messages with pack_6bits_offsets=true, do the bottom packing
* of :
*
* ------------------------------------------
* |Bits | [31:12] | [11:6] | [5:0] |
* ------------------------------------------
* |OffsetUV | LOD/Bias | OffsetV | OffsetU |
* ------------------------------------------
*/
const unsigned num_components =
nir_tex_instr_src_size(tex, offset_index);
static const unsigned bits4_bits[] = { 4, 4, 4, };
static const unsigned bits6_bits[] = { 6, 6, 0, };
offset = nir_pad_vector_imm_int(b, offset, 0, num_components);
offset = nir_format_clamp_sint(
b, offset, pack_6bits_offsets ? bits6_bits : bits4_bits);
static const unsigned bits4_offsets[] = { 8, 4, 0, };
static const unsigned bits6_offsets[] = { 0, 6, 0, };
const unsigned *comp_bits_offsets = pack_6bits_offsets ?
bits6_offsets : bits4_offsets;
const unsigned value_mask = pack_6bits_offsets ? 0x3f : 0xf;
nir_def *packed_offset = NULL;
for (unsigned c = 0; c < num_components; c++) {
nir_def *c_shifted = nir_ishl_imm(
b,
nir_iand_imm(b, nir_channel(b, offset, c), value_mask),
comp_bits_offsets[c]);
packed_offset = packed_offset == NULL ? c_shifted : nir_ior(b, packed_offset, c_shifted);
}
nir_tex_instr_remove_src(tex, offset_index);
nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed_offset);
return true;
}
static bool
intel_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data)
{
const struct intel_device_info *devinfo = cb_data;
const bool has_lod =
nir_tex_instr_src_index(tex, nir_tex_src_lod) != -1 ||
nir_tex_instr_src_index(tex, nir_tex_src_bias) != -1;
/* On Gfx20+, when we have a LOD, we need to pack the offsets with it. When
* there is no LOD, the offsets are lowered in the coordinates (see
* lower_xehp_tg4_offset_filter).
*/
const bool needs_tg4_load_bias_offset_packing =
tex->op == nir_texop_tg4 && has_lod &&
devinfo->ver >= 20;
const bool needs_tg4_offset_packing = devinfo->verx10 >= 125;
bool progress = false;
if (tex->op != nir_texop_txf &&
(tex->op != nir_texop_tg4 || needs_tg4_offset_packing)) {
progress |= pack_offset(b, tex, needs_tg4_load_bias_offset_packing);
}
switch (tex->op) { switch (tex->op) {
case nir_texop_txl: case nir_texop_txl:
case nir_texop_txb: case nir_texop_txb:
case nir_texop_tg4: case nir_texop_tg4: {
if (tex->is_array && if (tex->is_array &&
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
opts->combined_lod_and_array_index) { devinfo->ver >= 20) {
return pack_lod_and_array_index(b, tex); progress |= pack_lod_and_array_index(b, tex);
} }
if (tex->op == nir_texop_tg4 && opts->combined_lod_or_bias_and_offset) { if (needs_tg4_load_bias_offset_packing)
return pack_lod_or_bias_and_offset(b, tex); progress |= pack_lod_or_bias_and_offset(b, tex);
}
return false; break;
}
default: default:
/* Nothing to do */ break;
return false;
} }
return false; return progress;
} }
bool bool
brw_nir_lower_texture(nir_shader *shader, brw_nir_lower_texture(nir_shader *shader,
const struct brw_nir_lower_texture_opts *opts) const struct intel_device_info *devinfo)
{ {
return nir_shader_instructions_pass(shader, return nir_shader_tex_pass(shader,
brw_nir_lower_texture_instr, intel_nir_lower_texture_instr,
nir_metadata_none, nir_metadata_none,
(void *)opts); (void *)devinfo);
} }