diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 938e7f7f802..a1963146b05 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -855,21 +855,18 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array) } /* - * In the hardware, bindless texture sources are specified as a 64-bit uniform - * base address summed with a 32-bit register index. In NIR, we model this as a - * vec2, where the first source is the (constant) uniform register number and - * the second source is the (dynamic) byte offset. + * Hardware bindless texture sources are specified as a 64-bit uniform base + * address summed with a 32-bit register index. We model in NIR with the + * bindless_image_agx intrinsic. */ static agx_index agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base) { - nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0); - assert(nir_scalar_is_const(base_scalar) && "base must be constant"); + nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle); + assert(intr->intrinsic == nir_intrinsic_bindless_image_agx); - unsigned base_uint = nir_scalar_as_uint(base_scalar); - *base = agx_uniform(base_uint, AGX_SIZE_64); - - return agx_emit_extract(b, agx_src_index(handle), 1); + *base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64); + return agx_src_index(&intr->src[0]); } static agx_instr * @@ -1730,6 +1727,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_export_agx: return agx_emit_export(b, nir_intrinsic_base(instr), instr->src[0]); + case nir_intrinsic_bindless_image_agx: + /* These must always be chased */ + return NULL; + case nir_intrinsic_load_barycentric_sample: case nir_intrinsic_load_sample_id: case nir_intrinsic_load_sample_pos: @@ -3375,15 +3376,11 @@ lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx) return false; - /* Bindless handles are a vec2, where the first source is the (constant) - * uniform register number and the second source is the byte offset. - */ - nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0); - unsigned uniform_idx = nir_scalar_as_uint(uniform); + nir_intrinsic_instr *handle = nir_src_as_intrinsic(intr->src[0]); b->cursor = nir_instr_remove(&intr->instr); - nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx); - nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1)); + nir_def *base = nir_load_preamble(b, 1, 64, nir_intrinsic_desc_set(handle)); + nir_def *offset = nir_u2u64(b, handle->src[0].ssa); nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset)); return true; diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index ee2c8243ca1..2fbb97e689d 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -8,6 +8,7 @@ #include "util/macros.h" #include "agx_compiler.h" #include "nir.h" +#include "nir_intrinsics.h" #include "nir_opcodes.h" static void @@ -279,43 +280,11 @@ rewrite_cost(nir_def *def, const void *data) static bool avoid_instr(const nir_instr *instr, const void *data) { - const nir_def *def = nir_instr_def((nir_instr *)instr); + if (instr->type != nir_instr_type_intrinsic) + return false; - /* Do not move bindless handles, since we need those to retain their - * constant base index. - */ - if (def) { - nir_foreach_use(use, def) { - if (nir_src_parent_instr(use)->type == nir_instr_type_tex) { - /* Check if used as a bindless texture handle */ - nir_tex_instr *tex = nir_instr_as_tex(nir_src_parent_instr(use)); - int handle_idx = - nir_tex_instr_src_index(tex, nir_tex_src_texture_handle); - - if (handle_idx >= 0 && tex->src[handle_idx].src.ssa == def) - return true; - } else if (nir_src_parent_instr(use)->type == - nir_instr_type_intrinsic) { - /* Check if used as a bindless image handle */ - nir_intrinsic_instr *intr = - nir_instr_as_intrinsic(nir_src_parent_instr(use)); - - switch (intr->intrinsic) { - case nir_intrinsic_bindless_image_load: - case nir_intrinsic_bindless_image_sparse_load: - case nir_intrinsic_bindless_image_store: - case nir_intrinsic_bindless_image_store_block_agx: - if (intr->src[0].ssa == def) - return true; - break; - default: - break; - } - } - } - } - - return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return intr->intrinsic == nir_intrinsic_bindless_image_agx; } static const nir_opt_preamble_options preamble_options = { diff --git a/src/asahi/lib/agx_bg_eot.c b/src/asahi/lib/agx_bg_eot.c index d0a24eb3f6e..0045907de55 100644 --- a/src/asahi/lib/agx_bg_eot.c +++ b/src/asahi/lib/agx_bg_eot.c @@ -15,7 +15,6 @@ #include "libagx_shaders.h" #include "nir.h" #include "nir_builder.h" -#include "nir_intrinsics.h" #include "pool.h" static bool @@ -26,8 +25,8 @@ lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data) b->cursor = nir_instr_remove(&intr->instr); nir_def_rewrite_uses( - &intr->def, - nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24))); + &intr->def, nir_bindless_image_agx( + b, nir_imul_imm(b, intr->src[0].ssa, 24), .desc_set = 0)); return true; } diff --git a/src/asahi/lib/agx_nir_prolog_epilog.c b/src/asahi/lib/agx_nir_prolog_epilog.c index 479ebb39f7c..67aa913834f 100644 --- a/src/asahi/lib/agx_nir_prolog_epilog.c +++ b/src/asahi/lib/agx_nir_prolog_epilog.c @@ -140,7 +140,8 @@ lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr, b->cursor = nir_instr_remove(&intr->instr); nir_def *offs = nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH); - nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs)); + nir_def_rewrite_uses(&intr->def, + nir_bindless_image_agx(b, offs, .desc_set = 0)); return true; } else { return false; diff --git a/src/asahi/libagx/compression.cl b/src/asahi/libagx/compression.cl index f7cff29b0e2..2050f5a3c66 100644 --- a/src/asahi/libagx/compression.cl +++ b/src/asahi/libagx/compression.cl @@ -24,7 +24,7 @@ * is hardcoded and the latter is an offsetof. */ #define HANDLE(field) \ - (uint2)(0, offsetof(struct libagx_decompress_images, field)) + nir_bindless_image_agx(offsetof(struct libagx_decompress_images, field), 0) /* * The metadata buffer is fully twiddled, so interleave the X/Y coordinate bits. diff --git a/src/asahi/libagx/libagx_intrinsics.h b/src/asahi/libagx/libagx_intrinsics.h index 51896633591..c4731402eb4 100644 --- a/src/asahi/libagx/libagx_intrinsics.h +++ b/src/asahi/libagx/libagx_intrinsics.h @@ -17,14 +17,15 @@ uint32_t nir_load_helper_arg_lo_agx(void); uint32_t nir_load_helper_arg_hi_agx(void); void nir_fence_helper_exit_agx(void); -uint4 nir_bindless_image_load(uint2 handle, int4 coord, uint sample, uint lod, +uint4 nir_bindless_image_load(uint handle, int4 coord, uint sample, uint lod, uint image_dim, uint image_array, uint format, uint access, uint dest_type); -void nir_bindless_image_store(uint2 handle, int4 coord, uint sample, - uint4 datum, uint lod, uint image_dim, - uint image_array, uint format, uint access, - uint src_type); +void nir_bindless_image_store(uint handle, int4 coord, uint sample, uint4 datum, + uint lod, uint image_dim, uint image_array, + uint format, uint access, uint src_type); + +uint nir_bindless_image_agx(uint offset_B, uint uniform); uint32_t libagx_twiddle_coordinates(ushort2 coord, uint16_t tile_w_px, uint16_t tile_h_px, diff --git a/src/asahi/vulkan/hk_shader.c b/src/asahi/vulkan/hk_shader.c index e603c240511..0ac77147f70 100644 --- a/src/asahi/vulkan/hk_shader.c +++ b/src/asahi/vulkan/hk_shader.c @@ -960,7 +960,8 @@ lower_uniforms(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_def *rep; if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) { - rep = nir_vec2(b, nir_imm_int(b, ctx->image_heap), intr->src[0].ssa); + rep = nir_bindless_image_agx(b, intr->src[0].ssa, + .desc_set = ctx->image_heap); } else { rep = nir_load_preamble(b, 1, 64, .base = ctx->root); } diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 8feff77159b..cecf84c23be 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -727,6 +727,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_reg: case nir_intrinsic_load_constant_agx: case nir_intrinsic_load_texture_handle_agx: + case nir_intrinsic_bindless_image_agx: case nir_intrinsic_load_reg_indirect: case nir_intrinsic_load_const_ir3: case nir_intrinsic_load_frag_size_ir3: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 7345442a5d2..deae0b2e791 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2109,15 +2109,15 @@ intrinsic("load_sampler_handle_agx", [1], 1, [], bit_sizes=[16]) # Load a bindless texture handle mapping a binding table texture. -intrinsic("load_texture_handle_agx", [1], 2, [], +intrinsic("load_texture_handle_agx", [1], 1, [], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) -# Given a vec2 bindless texture handle, load the address of the texture -# descriptor described by that vec2. This allows inspecting the descriptor from -# the shader. This does not actually load the content of the descriptor, only -# the content of the handle (which is the address of the descriptor). -intrinsic("load_from_texture_handle_agx", [2], 1, [], +# Given a bindless texture handle, load the address of the texture descriptor +# described by that. This allows inspecting the descriptor from the shader. This +# does not actually load the content of the descriptor, only the content of the +# handle (which is the address of the descriptor). +intrinsic("load_from_texture_handle_agx", [1], 1, [], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[64]) @@ -2325,6 +2325,12 @@ intrinsic("export_agx", [0], indices=[BASE]) # at BASE. Must only appear in the first block of the shader part. load("exported_agx", [], [BASE], [CAN_ELIMINATE]) +# AGX-specific bindless texture/image handle specifier. Similar to +# vulkan_resource_index. The "descriptor set" here is the heap uniform. The +# source is the offset in bytes into the heap. +intrinsic("bindless_image_agx", [1], dest_comp=1, bit_sizes=[32], + indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER]) + # Intel-specific query for loading from the isl_image_param struct passed # into the shader as a uniform. The variable is a deref to the image # variable. The const index specifies which of the six parameters to load. diff --git a/src/compiler/nir/nir_opt_preamble.c b/src/compiler/nir/nir_opt_preamble.c index 6ad2a5c1950..c975cf53828 100644 --- a/src/compiler/nir/nir_opt_preamble.c +++ b/src/compiler/nir/nir_opt_preamble.c @@ -239,6 +239,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) case nir_intrinsic_bindless_resource_ir3: case nir_intrinsic_load_const_ir3: case nir_intrinsic_load_constant_agx: + case nir_intrinsic_bindless_image_agx: return can_move_srcs(&instr->instr, ctx); /* Image/SSBO loads can be moved if they are CAN_REORDER and their diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index 25f5eecff0b..6d7d1ed7c70 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -118,13 +118,12 @@ load_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *bases) static nir_def * load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base) { - nir_def *uniform = - nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b), - .binding = (uintptr_t)base, .flags = ~0); + nir_def *offs_B = + nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE); - return nir_vec2( - b, nir_u2u32(b, uniform), - nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_DESC_STRIDE)); + nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b), + .binding = (uintptr_t)base, .flags = ~0); + return nir_bindless_image_agx(b, offs_B); } static nir_def * @@ -461,23 +460,27 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state) nir_intrinsic_instr *intr = *intr_; uint8_t table = nir_intrinsic_desc_set(intr); uint16_t offset = nir_intrinsic_binding(intr); - bool load_uniform_location = nir_intrinsic_flags(intr); + bool bindless_image = nir_intrinsic_flags(intr); struct agx_push_range *range = find_push_range_containing(shader, table, offset); unsigned base = range->uniform + ((offset - range->offset) / 2); - nir_builder b = nir_builder_at(nir_instr_remove(&(intr->instr))); - nir_def *repl; + nir_builder b = nir_builder_at(nir_before_instr(&intr->instr)); - if (load_uniform_location) { - repl = nir_imm_int(&b, base); + if (bindless_image) { + nir_instr *next = nir_instr_next(&intr->instr); + assert(next->type == nir_instr_type_intrinsic); + + nir_intrinsic_instr *nintr = nir_instr_as_intrinsic(next); + assert(nintr->intrinsic == nir_intrinsic_bindless_image_agx); + + nir_intrinsic_set_desc_set(nintr, base); } else { - repl = nir_load_preamble(&b, intr->def.num_components, - intr->def.bit_size, .base = base); + nir_def *repl = nir_load_preamble(&b, intr->def.num_components, + intr->def.bit_size, .base = base); + nir_def_replace(&intr->def, repl); } - - nir_def_rewrite_uses(&intr->def, repl); } return uniform;