mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
nir,agx: switch to bindless_image_agx intrinsic
this is more explicit than vec2's and hence has fewer footguns. in particular
it's easier to handle with preambles in a sane way.
modelled on what ir3 does.
there's probably room for more clean up but for now this unblocks what I want to
do.
stats don't seem concerning.
Totals from 692 (1.29% of 53701) affected shaders:
MaxWaves: 441920 -> 442112 (+0.04%)
Instrs: 1588748 -> 1589304 (+0.03%); split: -0.05%, +0.08%
CodeSize: 11487976 -> 11491620 (+0.03%); split: -0.04%, +0.07%
ALU: 1234867 -> 1235407 (+0.04%); split: -0.06%, +0.10%
FSCIB: 1234707 -> 1235249 (+0.04%); split: -0.06%, +0.10%
IC: 380514 -> 380518 (+0.00%)
GPRs: 117292 -> 117332 (+0.03%); split: -0.08%, +0.11%
Preamble instrs: 314064 -> 313948 (-0.04%); split: -0.05%, +0.01%
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35949>
This commit is contained in:
parent
7f23f37e82
commit
ee26938faf
11 changed files with 64 additions and 85 deletions
|
|
@ -855,21 +855,18 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array)
|
|||
}
|
||||
|
||||
/*
|
||||
* In the hardware, bindless texture sources are specified as a 64-bit uniform
|
||||
* base address summed with a 32-bit register index. In NIR, we model this as a
|
||||
* vec2, where the first source is the (constant) uniform register number and
|
||||
* the second source is the (dynamic) byte offset.
|
||||
* Hardware bindless texture sources are specified as a 64-bit uniform base
|
||||
* address summed with a 32-bit register index. We model in NIR with the
|
||||
* bindless_image_agx intrinsic.
|
||||
*/
|
||||
static agx_index
|
||||
agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
|
||||
{
|
||||
nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0);
|
||||
assert(nir_scalar_is_const(base_scalar) && "base must be constant");
|
||||
nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle);
|
||||
assert(intr->intrinsic == nir_intrinsic_bindless_image_agx);
|
||||
|
||||
unsigned base_uint = nir_scalar_as_uint(base_scalar);
|
||||
*base = agx_uniform(base_uint, AGX_SIZE_64);
|
||||
|
||||
return agx_emit_extract(b, agx_src_index(handle), 1);
|
||||
*base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64);
|
||||
return agx_src_index(&intr->src[0]);
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
|
|
@ -1730,6 +1727,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_export_agx:
|
||||
return agx_emit_export(b, nir_intrinsic_base(instr), instr->src[0]);
|
||||
|
||||
case nir_intrinsic_bindless_image_agx:
|
||||
/* These must always be chased */
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
case nir_intrinsic_load_sample_id:
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
|
|
@ -3375,15 +3376,11 @@ lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx)
|
||||
return false;
|
||||
|
||||
/* Bindless handles are a vec2, where the first source is the (constant)
|
||||
* uniform register number and the second source is the byte offset.
|
||||
*/
|
||||
nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0);
|
||||
unsigned uniform_idx = nir_scalar_as_uint(uniform);
|
||||
nir_intrinsic_instr *handle = nir_src_as_intrinsic(intr->src[0]);
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
|
||||
nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1));
|
||||
nir_def *base = nir_load_preamble(b, 1, 64, nir_intrinsic_desc_set(handle));
|
||||
nir_def *offset = nir_u2u64(b, handle->src[0].ssa);
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset));
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include "util/macros.h"
|
||||
#include "agx_compiler.h"
|
||||
#include "nir.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_opcodes.h"
|
||||
|
||||
static void
|
||||
|
|
@ -279,43 +280,11 @@ rewrite_cost(nir_def *def, const void *data)
|
|||
static bool
|
||||
avoid_instr(const nir_instr *instr, const void *data)
|
||||
{
|
||||
const nir_def *def = nir_instr_def((nir_instr *)instr);
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
/* Do not move bindless handles, since we need those to retain their
|
||||
* constant base index.
|
||||
*/
|
||||
if (def) {
|
||||
nir_foreach_use(use, def) {
|
||||
if (nir_src_parent_instr(use)->type == nir_instr_type_tex) {
|
||||
/* Check if used as a bindless texture handle */
|
||||
nir_tex_instr *tex = nir_instr_as_tex(nir_src_parent_instr(use));
|
||||
int handle_idx =
|
||||
nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
||||
|
||||
if (handle_idx >= 0 && tex->src[handle_idx].src.ssa == def)
|
||||
return true;
|
||||
} else if (nir_src_parent_instr(use)->type ==
|
||||
nir_instr_type_intrinsic) {
|
||||
/* Check if used as a bindless image handle */
|
||||
nir_intrinsic_instr *intr =
|
||||
nir_instr_as_intrinsic(nir_src_parent_instr(use));
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_store_block_agx:
|
||||
if (intr->src[0].ssa == def)
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
return intr->intrinsic == nir_intrinsic_bindless_image_agx;
|
||||
}
|
||||
|
||||
static const nir_opt_preamble_options preamble_options = {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@
|
|||
#include "libagx_shaders.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "pool.h"
|
||||
|
||||
static bool
|
||||
|
|
@ -26,8 +25,8 @@ lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(
|
||||
&intr->def,
|
||||
nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
|
||||
&intr->def, nir_bindless_image_agx(
|
||||
b, nir_imul_imm(b, intr->src[0].ssa, 24), .desc_set = 0));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,7 +140,8 @@ lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *offs =
|
||||
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
|
||||
nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs));
|
||||
nir_def_rewrite_uses(&intr->def,
|
||||
nir_bindless_image_agx(b, offs, .desc_set = 0));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
* is hardcoded and the latter is an offsetof.
|
||||
*/
|
||||
#define HANDLE(field) \
|
||||
(uint2)(0, offsetof(struct libagx_decompress_images, field))
|
||||
nir_bindless_image_agx(offsetof(struct libagx_decompress_images, field), 0)
|
||||
|
||||
/*
|
||||
* The metadata buffer is fully twiddled, so interleave the X/Y coordinate bits.
|
||||
|
|
|
|||
|
|
@ -17,14 +17,15 @@ uint32_t nir_load_helper_arg_lo_agx(void);
|
|||
uint32_t nir_load_helper_arg_hi_agx(void);
|
||||
void nir_fence_helper_exit_agx(void);
|
||||
|
||||
uint4 nir_bindless_image_load(uint2 handle, int4 coord, uint sample, uint lod,
|
||||
uint4 nir_bindless_image_load(uint handle, int4 coord, uint sample, uint lod,
|
||||
uint image_dim, uint image_array, uint format,
|
||||
uint access, uint dest_type);
|
||||
|
||||
void nir_bindless_image_store(uint2 handle, int4 coord, uint sample,
|
||||
uint4 datum, uint lod, uint image_dim,
|
||||
uint image_array, uint format, uint access,
|
||||
uint src_type);
|
||||
void nir_bindless_image_store(uint handle, int4 coord, uint sample, uint4 datum,
|
||||
uint lod, uint image_dim, uint image_array,
|
||||
uint format, uint access, uint src_type);
|
||||
|
||||
uint nir_bindless_image_agx(uint offset_B, uint uniform);
|
||||
|
||||
uint32_t libagx_twiddle_coordinates(ushort2 coord, uint16_t tile_w_px,
|
||||
uint16_t tile_h_px,
|
||||
|
|
|
|||
|
|
@ -960,7 +960,8 @@ lower_uniforms(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
nir_def *rep;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
|
||||
rep = nir_vec2(b, nir_imm_int(b, ctx->image_heap), intr->src[0].ssa);
|
||||
rep = nir_bindless_image_agx(b, intr->src[0].ssa,
|
||||
.desc_set = ctx->image_heap);
|
||||
} else {
|
||||
rep = nir_load_preamble(b, 1, 64, .base = ctx->root);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -727,6 +727,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_reg:
|
||||
case nir_intrinsic_load_constant_agx:
|
||||
case nir_intrinsic_load_texture_handle_agx:
|
||||
case nir_intrinsic_bindless_image_agx:
|
||||
case nir_intrinsic_load_reg_indirect:
|
||||
case nir_intrinsic_load_const_ir3:
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
|
|
|
|||
|
|
@ -2109,15 +2109,15 @@ intrinsic("load_sampler_handle_agx", [1], 1, [],
|
|||
bit_sizes=[16])
|
||||
|
||||
# Load a bindless texture handle mapping a binding table texture.
|
||||
intrinsic("load_texture_handle_agx", [1], 2, [],
|
||||
intrinsic("load_texture_handle_agx", [1], 1, [],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER],
|
||||
bit_sizes=[32])
|
||||
|
||||
# Given a vec2 bindless texture handle, load the address of the texture
|
||||
# descriptor described by that vec2. This allows inspecting the descriptor from
|
||||
# the shader. This does not actually load the content of the descriptor, only
|
||||
# the content of the handle (which is the address of the descriptor).
|
||||
intrinsic("load_from_texture_handle_agx", [2], 1, [],
|
||||
# Given a bindless texture handle, load the address of the texture descriptor
|
||||
# described by that. This allows inspecting the descriptor from the shader. This
|
||||
# does not actually load the content of the descriptor, only the content of the
|
||||
# handle (which is the address of the descriptor).
|
||||
intrinsic("load_from_texture_handle_agx", [1], 1, [],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER],
|
||||
bit_sizes=[64])
|
||||
|
||||
|
|
@ -2325,6 +2325,12 @@ intrinsic("export_agx", [0], indices=[BASE])
|
|||
# at BASE. Must only appear in the first block of the shader part.
|
||||
load("exported_agx", [], [BASE], [CAN_ELIMINATE])
|
||||
|
||||
# AGX-specific bindless texture/image handle specifier. Similar to
|
||||
# vulkan_resource_index. The "descriptor set" here is the heap uniform. The
|
||||
# source is the offset in bytes into the heap.
|
||||
intrinsic("bindless_image_agx", [1], dest_comp=1, bit_sizes=[32],
|
||||
indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Intel-specific query for loading from the isl_image_param struct passed
|
||||
# into the shader as a uniform. The variable is a deref to the image
|
||||
# variable. The const index specifies which of the six parameters to load.
|
||||
|
|
|
|||
|
|
@ -239,6 +239,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
|||
case nir_intrinsic_bindless_resource_ir3:
|
||||
case nir_intrinsic_load_const_ir3:
|
||||
case nir_intrinsic_load_constant_agx:
|
||||
case nir_intrinsic_bindless_image_agx:
|
||||
return can_move_srcs(&instr->instr, ctx);
|
||||
|
||||
/* Image/SSBO loads can be moved if they are CAN_REORDER and their
|
||||
|
|
|
|||
|
|
@ -118,13 +118,12 @@ load_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *bases)
|
|||
static nir_def *
|
||||
load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base)
|
||||
{
|
||||
nir_def *uniform =
|
||||
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
|
||||
.binding = (uintptr_t)base, .flags = ~0);
|
||||
nir_def *offs_B =
|
||||
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE);
|
||||
|
||||
return nir_vec2(
|
||||
b, nir_u2u32(b, uniform),
|
||||
nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE));
|
||||
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
|
||||
.binding = (uintptr_t)base, .flags = ~0);
|
||||
return nir_bindless_image_agx(b, offs_B);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -461,23 +460,27 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
|||
nir_intrinsic_instr *intr = *intr_;
|
||||
uint8_t table = nir_intrinsic_desc_set(intr);
|
||||
uint16_t offset = nir_intrinsic_binding(intr);
|
||||
bool load_uniform_location = nir_intrinsic_flags(intr);
|
||||
bool bindless_image = nir_intrinsic_flags(intr);
|
||||
|
||||
struct agx_push_range *range =
|
||||
find_push_range_containing(shader, table, offset);
|
||||
unsigned base = range->uniform + ((offset - range->offset) / 2);
|
||||
|
||||
nir_builder b = nir_builder_at(nir_instr_remove(&(intr->instr)));
|
||||
nir_def *repl;
|
||||
nir_builder b = nir_builder_at(nir_before_instr(&intr->instr));
|
||||
|
||||
if (load_uniform_location) {
|
||||
repl = nir_imm_int(&b, base);
|
||||
if (bindless_image) {
|
||||
nir_instr *next = nir_instr_next(&intr->instr);
|
||||
assert(next->type == nir_instr_type_intrinsic);
|
||||
|
||||
nir_intrinsic_instr *nintr = nir_instr_as_intrinsic(next);
|
||||
assert(nintr->intrinsic == nir_intrinsic_bindless_image_agx);
|
||||
|
||||
nir_intrinsic_set_desc_set(nintr, base);
|
||||
} else {
|
||||
repl = nir_load_preamble(&b, intr->def.num_components,
|
||||
intr->def.bit_size, .base = base);
|
||||
nir_def *repl = nir_load_preamble(&b, intr->def.num_components,
|
||||
intr->def.bit_size, .base = base);
|
||||
nir_def_replace(&intr->def, repl);
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, repl);
|
||||
}
|
||||
|
||||
return uniform;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue