nir,agx: switch to bindless_image_agx intrinsic

this is more explicit than vec2's and hence has fewer footguns. in particular
it's easier to handle with preambles in a sane way.

modelled on what ir3 does.

there's probably room for more clean up but for now this unblocks what I want to
do.

stats don't seem concerning.

Totals from 692 (1.29% of 53701) affected shaders:
MaxWaves: 441920 -> 442112 (+0.04%)
Instrs: 1588748 -> 1589304 (+0.03%); split: -0.05%, +0.08%
CodeSize: 11487976 -> 11491620 (+0.03%); split: -0.04%, +0.07%
ALU: 1234867 -> 1235407 (+0.04%); split: -0.06%, +0.10%
FSCIB: 1234707 -> 1235249 (+0.04%); split: -0.06%, +0.10%
IC: 380514 -> 380518 (+0.00%)
GPRs: 117292 -> 117332 (+0.03%); split: -0.08%, +0.11%
Preamble instrs: 314064 -> 313948 (-0.04%); split: -0.05%, +0.01%

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
This commit is contained in:
Alyssa Rosenzweig 2025-07-02 17:35:01 -04:00
parent 7f23f37e82
commit ee26938faf
11 changed files with 64 additions and 85 deletions

View file

@ -855,21 +855,18 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array)
}
/*
* In the hardware, bindless texture sources are specified as a 64-bit uniform
* base address summed with a 32-bit register index. In NIR, we model this as a
* vec2, where the first source is the (constant) uniform register number and
* the second source is the (dynamic) byte offset.
* Hardware bindless texture sources are specified as a 64-bit uniform base
* address summed with a 32-bit register index. We model in NIR with the
* bindless_image_agx intrinsic.
*/
static agx_index
agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
{
nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0);
assert(nir_scalar_is_const(base_scalar) && "base must be constant");
nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle);
assert(intr->intrinsic == nir_intrinsic_bindless_image_agx);
unsigned base_uint = nir_scalar_as_uint(base_scalar);
*base = agx_uniform(base_uint, AGX_SIZE_64);
return agx_emit_extract(b, agx_src_index(handle), 1);
*base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64);
return agx_src_index(&intr->src[0]);
}
static agx_instr *
@ -1730,6 +1727,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_export_agx:
return agx_emit_export(b, nir_intrinsic_base(instr), instr->src[0]);
case nir_intrinsic_bindless_image_agx:
/* These must always be chased */
return NULL;
case nir_intrinsic_load_barycentric_sample:
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_pos:
@ -3375,15 +3376,11 @@ lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr,
if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx)
return false;
/* Bindless handles are a vec2, where the first source is the (constant)
* uniform register number and the second source is the byte offset.
*/
nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0);
unsigned uniform_idx = nir_scalar_as_uint(uniform);
nir_intrinsic_instr *handle = nir_src_as_intrinsic(intr->src[0]);
b->cursor = nir_instr_remove(&intr->instr);
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1));
nir_def *base = nir_load_preamble(b, 1, 64, nir_intrinsic_desc_set(handle));
nir_def *offset = nir_u2u64(b, handle->src[0].ssa);
nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset));
return true;

View file

@ -8,6 +8,7 @@
#include "util/macros.h"
#include "agx_compiler.h"
#include "nir.h"
#include "nir_intrinsics.h"
#include "nir_opcodes.h"
static void
@ -279,43 +280,11 @@ rewrite_cost(nir_def *def, const void *data)
static bool
avoid_instr(const nir_instr *instr, const void *data)
{
const nir_def *def = nir_instr_def((nir_instr *)instr);
if (instr->type != nir_instr_type_intrinsic)
return false;
/* Do not move bindless handles, since we need those to retain their
* constant base index.
*/
if (def) {
nir_foreach_use(use, def) {
if (nir_src_parent_instr(use)->type == nir_instr_type_tex) {
/* Check if used as a bindless texture handle */
nir_tex_instr *tex = nir_instr_as_tex(nir_src_parent_instr(use));
int handle_idx =
nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
if (handle_idx >= 0 && tex->src[handle_idx].src.ssa == def)
return true;
} else if (nir_src_parent_instr(use)->type ==
nir_instr_type_intrinsic) {
/* Check if used as a bindless image handle */
nir_intrinsic_instr *intr =
nir_instr_as_intrinsic(nir_src_parent_instr(use));
switch (intr->intrinsic) {
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
case nir_intrinsic_bindless_image_store:
case nir_intrinsic_bindless_image_store_block_agx:
if (intr->src[0].ssa == def)
return true;
break;
default:
break;
}
}
}
}
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
return intr->intrinsic == nir_intrinsic_bindless_image_agx;
}
static const nir_opt_preamble_options preamble_options = {

View file

@ -15,7 +15,6 @@
#include "libagx_shaders.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_intrinsics.h"
#include "pool.h"
static bool
@ -26,8 +25,8 @@ lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
b->cursor = nir_instr_remove(&intr->instr);
nir_def_rewrite_uses(
&intr->def,
nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
&intr->def, nir_bindless_image_agx(
b, nir_imul_imm(b, intr->src[0].ssa, 24), .desc_set = 0));
return true;
}

View file

@ -140,7 +140,8 @@ lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
b->cursor = nir_instr_remove(&intr->instr);
nir_def *offs =
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs));
nir_def_rewrite_uses(&intr->def,
nir_bindless_image_agx(b, offs, .desc_set = 0));
return true;
} else {
return false;

View file

@ -24,7 +24,7 @@
* is hardcoded and the latter is an offsetof.
*/
#define HANDLE(field) \
(uint2)(0, offsetof(struct libagx_decompress_images, field))
nir_bindless_image_agx(offsetof(struct libagx_decompress_images, field), 0)
/*
* The metadata buffer is fully twiddled, so interleave the X/Y coordinate bits.

View file

@ -17,14 +17,15 @@ uint32_t nir_load_helper_arg_lo_agx(void);
uint32_t nir_load_helper_arg_hi_agx(void);
void nir_fence_helper_exit_agx(void);
uint4 nir_bindless_image_load(uint2 handle, int4 coord, uint sample, uint lod,
uint4 nir_bindless_image_load(uint handle, int4 coord, uint sample, uint lod,
uint image_dim, uint image_array, uint format,
uint access, uint dest_type);
void nir_bindless_image_store(uint2 handle, int4 coord, uint sample,
uint4 datum, uint lod, uint image_dim,
uint image_array, uint format, uint access,
uint src_type);
void nir_bindless_image_store(uint handle, int4 coord, uint sample, uint4 datum,
uint lod, uint image_dim, uint image_array,
uint format, uint access, uint src_type);
uint nir_bindless_image_agx(uint offset_B, uint uniform);
uint32_t libagx_twiddle_coordinates(ushort2 coord, uint16_t tile_w_px,
uint16_t tile_h_px,

View file

@ -960,7 +960,8 @@ lower_uniforms(nir_builder *b, nir_intrinsic_instr *intr, void *data)
nir_def *rep;
if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
rep = nir_vec2(b, nir_imm_int(b, ctx->image_heap), intr->src[0].ssa);
rep = nir_bindless_image_agx(b, intr->src[0].ssa,
.desc_set = ctx->image_heap);
} else {
rep = nir_load_preamble(b, 1, 64, .base = ctx->root);
}

View file

@ -727,6 +727,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_reg:
case nir_intrinsic_load_constant_agx:
case nir_intrinsic_load_texture_handle_agx:
case nir_intrinsic_bindless_image_agx:
case nir_intrinsic_load_reg_indirect:
case nir_intrinsic_load_const_ir3:
case nir_intrinsic_load_frag_size_ir3:

View file

@ -2109,15 +2109,15 @@ intrinsic("load_sampler_handle_agx", [1], 1, [],
bit_sizes=[16])
# Load a bindless texture handle mapping a binding table texture.
intrinsic("load_texture_handle_agx", [1], 2, [],
intrinsic("load_texture_handle_agx", [1], 1, [],
flags=[CAN_ELIMINATE, CAN_REORDER],
bit_sizes=[32])
# Given a vec2 bindless texture handle, load the address of the texture
# descriptor described by that vec2. This allows inspecting the descriptor from
# the shader. This does not actually load the content of the descriptor, only
# the content of the handle (which is the address of the descriptor).
intrinsic("load_from_texture_handle_agx", [2], 1, [],
# Given a bindless texture handle, load the address of the texture descriptor
# described by that. This allows inspecting the descriptor from the shader. This
# does not actually load the content of the descriptor, only the content of the
# handle (which is the address of the descriptor).
intrinsic("load_from_texture_handle_agx", [1], 1, [],
flags=[CAN_ELIMINATE, CAN_REORDER],
bit_sizes=[64])
@ -2325,6 +2325,12 @@ intrinsic("export_agx", [0], indices=[BASE])
# at BASE. Must only appear in the first block of the shader part.
load("exported_agx", [], [BASE], [CAN_ELIMINATE])
# AGX-specific bindless texture/image handle specifier. Similar to
# vulkan_resource_index. The "descriptor set" here is the heap uniform. The
# source is the offset in bytes into the heap.
intrinsic("bindless_image_agx", [1], dest_comp=1, bit_sizes=[32],
indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER])
# Intel-specific query for loading from the isl_image_param struct passed
# into the shader as a uniform. The variable is a deref to the image
# variable. The const index specifies which of the six parameters to load.

View file

@ -239,6 +239,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
case nir_intrinsic_bindless_resource_ir3:
case nir_intrinsic_load_const_ir3:
case nir_intrinsic_load_constant_agx:
case nir_intrinsic_bindless_image_agx:
return can_move_srcs(&instr->instr, ctx);
/* Image/SSBO loads can be moved if they are CAN_REORDER and their

View file

@ -118,13 +118,12 @@ load_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *bases)
static nir_def *
load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base)
{
nir_def *uniform =
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
.binding = (uintptr_t)base, .flags = ~0);
nir_def *offs_B =
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE);
return nir_vec2(
b, nir_u2u32(b, uniform),
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE));
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
.binding = (uintptr_t)base, .flags = ~0);
return nir_bindless_image_agx(b, offs_B);
}
static nir_def *
@ -461,23 +460,27 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
nir_intrinsic_instr *intr = *intr_;
uint8_t table = nir_intrinsic_desc_set(intr);
uint16_t offset = nir_intrinsic_binding(intr);
bool load_uniform_location = nir_intrinsic_flags(intr);
bool bindless_image = nir_intrinsic_flags(intr);
struct agx_push_range *range =
find_push_range_containing(shader, table, offset);
unsigned base = range->uniform + ((offset - range->offset) / 2);
nir_builder b = nir_builder_at(nir_instr_remove(&(intr->instr)));
nir_def *repl;
nir_builder b = nir_builder_at(nir_before_instr(&intr->instr));
if (load_uniform_location) {
repl = nir_imm_int(&b, base);
if (bindless_image) {
nir_instr *next = nir_instr_next(&intr->instr);
assert(next->type == nir_instr_type_intrinsic);
nir_intrinsic_instr *nintr = nir_instr_as_intrinsic(next);
assert(nintr->intrinsic == nir_intrinsic_bindless_image_agx);
nir_intrinsic_set_desc_set(nintr, base);
} else {
repl = nir_load_preamble(&b, intr->def.num_components,
intr->def.bit_size, .base = base);
nir_def *repl = nir_load_preamble(&b, intr->def.num_components,
intr->def.bit_size, .base = base);
nir_def_replace(&intr->def, repl);
}
nir_def_rewrite_uses(&intr->def, repl);
}
return uniform;