mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-15 05:10:18 +01:00
hk,agx: promote bindless samplers
via the bindless_sampler_agx intrinsic. Totals from 29771 (55.11% of 54019) affected shaders: MaxWaves: 28934080 -> 28938304 (+0.01%); split: +0.02%, -0.00% Instrs: 16623874 -> 16369120 (-1.53%); split: -1.54%, +0.01% CodeSize: 117532138 -> 115994992 (-1.31%); split: -1.32%, +0.01% Spills: 12721 -> 12652 (-0.54%); split: -0.72%, +0.17% Fills: 6733 -> 6636 (-1.44%); split: -1.96%, +0.52% Scratch: 132994 -> 132712 (-0.21%); split: -0.22%, +0.01% ALU: 13054253 -> 12803059 (-1.92%); split: -1.93%, +0.01% FSCIB: 13054138 -> 12802912 (-1.92%); split: -1.94%, +0.01% IC: 3916012 -> 3915588 (-0.01%); split: -0.01%, +0.00% GPRs: 2290907 -> 2289519 (-0.06%); split: -0.07%, +0.01% Uniforms: 6794773 -> 6696943 (-1.44%); split: -1.44%, +0.00% Preamble instrs: 6953594 -> 7024455 (+1.02%); split: -0.37%, +1.39% Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36127>
This commit is contained in:
parent
49f042c5e8
commit
642c6c6f62
8 changed files with 130 additions and 37 deletions
|
|
@ -792,7 +792,8 @@ static agx_index
|
|||
agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
|
||||
{
|
||||
nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle);
|
||||
assert(intr->intrinsic == nir_intrinsic_bindless_image_agx);
|
||||
assert(intr->intrinsic == nir_intrinsic_bindless_image_agx ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_sampler_agx);
|
||||
|
||||
*base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64);
|
||||
return agx_src_index(&intr->src[0]);
|
||||
|
|
@ -822,10 +823,17 @@ agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
nir_preamble_class cls = nir_intrinsic_preamble_class(instr);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
|
||||
if (cls == nir_preamble_class_image) {
|
||||
if (cls != nir_preamble_class_general) {
|
||||
agx_index heap, offset;
|
||||
offset = agx_translate_bindless_handle(b, &instr->src[0], &heap);
|
||||
return agx_tex_state_store(b, heap, offset, base / 2);
|
||||
|
||||
/* base is 32-bit units for images but 16-bit for samplers, hence the
|
||||
* division difference to convert into texture/sampler state units.
|
||||
*/
|
||||
if (cls == nir_preamble_class_image)
|
||||
return agx_tex_state_store(b, heap, offset, base / 2);
|
||||
else
|
||||
return agx_sampler_state_store(b, heap, offset, base);
|
||||
}
|
||||
|
||||
agx_index vec = agx_src_index(&instr->src[0]);
|
||||
|
|
@ -1740,6 +1748,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
return agx_emit_export(b, nir_intrinsic_base(instr), instr->src[0]);
|
||||
|
||||
case nir_intrinsic_bindless_image_agx:
|
||||
case nir_intrinsic_bindless_sampler_agx:
|
||||
/* These must always be chased */
|
||||
return NULL;
|
||||
|
||||
|
|
@ -2217,8 +2226,14 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
|
|||
{
|
||||
agx_index coords = agx_null(), bindless = agx_immediate(0),
|
||||
texture = agx_immediate(instr->texture_index),
|
||||
sampler = agx_immediate(0), lod = agx_immediate(0),
|
||||
compare = agx_null(), packed_offset = agx_null();
|
||||
sampler = agx_immediate(instr->sampler_index),
|
||||
lod = agx_immediate(0), compare = agx_null(),
|
||||
packed_offset = agx_null();
|
||||
|
||||
/* Default to the txf sampler at ss0 */
|
||||
if (!nir_tex_instr_need_sampler(instr)) {
|
||||
sampler = agx_immediate(0);
|
||||
}
|
||||
|
||||
bool lod_is_zero = true;
|
||||
|
||||
|
|
@ -3047,7 +3062,7 @@ optimize_bounds(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
static void
|
||||
agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size,
|
||||
uint8_t *ts_count)
|
||||
uint8_t *ts_count, uint8_t *ss_count)
|
||||
{
|
||||
/* This runs only once up front since other optimizations don't affect it */
|
||||
NIR_PASS(_, nir, nir_opt_shrink_stores, true);
|
||||
|
|
@ -3151,13 +3166,26 @@ agx_optimize_nir(nir_shader *nir, bool soft_fault, uint16_t *preamble_size,
|
|||
NIR_PASS(_, nir, agx_nir_lower_fminmax);
|
||||
|
||||
if (preamble_size && (!(agx_compiler_debug & AGX_DBG_NOPREAMBLE))) {
|
||||
unsigned temp = *preamble_size;
|
||||
unsigned temp_ts_count = ts_count ? *ts_count : 1000 /* large finite */;
|
||||
NIR_PASS(_, nir, agx_nir_opt_preamble, &temp, &temp_ts_count);
|
||||
*preamble_size = temp;
|
||||
unsigned sizes[] = {
|
||||
*preamble_size,
|
||||
ts_count ? *ts_count : 1000 /* large finite */,
|
||||
ss_count ? *ss_count : 1000 /* large finite */,
|
||||
};
|
||||
|
||||
/* Don't clobber txf sampler */
|
||||
if (sizes[2] == 0)
|
||||
sizes[2]++;
|
||||
|
||||
NIR_PASS(_, nir, agx_nir_opt_preamble, sizes);
|
||||
|
||||
*preamble_size = sizes[0];
|
||||
|
||||
if (ts_count)
|
||||
*ts_count = temp_ts_count;
|
||||
*ts_count = sizes[1];
|
||||
|
||||
/* if something other than the txf sampler is written... */
|
||||
if (ss_count && sizes[2] > 1)
|
||||
*ss_count = sizes[2];
|
||||
}
|
||||
|
||||
/* Forming preambles may dramatically reduce the instruction count
|
||||
|
|
@ -3907,7 +3935,9 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
|||
agx_optimize_nir(
|
||||
nir, key->dev.soft_fault, key->secondary ? NULL : &info->push_count,
|
||||
(key->secondary || !key->promote_textures) ? NULL
|
||||
: &info->texture_state_count);
|
||||
: &info->texture_state_count,
|
||||
(key->secondary || !key->promote_textures) ? NULL
|
||||
: &info->sampler_state_count);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
info->varyings.fs.nr_cf = key->fs.cf_base;
|
||||
|
|
|
|||
|
|
@ -287,9 +287,10 @@ struct agx_shader_key {
|
|||
*/
|
||||
bool promote_constants;
|
||||
|
||||
/* Similarly whether the driver supports promoting bindless textures.
|
||||
* Currently this works only if non-bindless textures are not used, but
|
||||
* none of our drivers mix bindless / non-bindless usage.
|
||||
/* Similarly whether the driver supports promoting bindless
|
||||
* textures/samplers. Currently this works only if non-bindless
|
||||
* textures/samplers are not used, but none of our drivers mix bindless /
|
||||
* non-bindless usage.
|
||||
*/
|
||||
bool promote_textures;
|
||||
|
||||
|
|
|
|||
|
|
@ -1080,8 +1080,7 @@ void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
|
|||
void agx_compute_liveness(agx_context *ctx);
|
||||
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
|
||||
|
||||
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size,
|
||||
unsigned *ts_count);
|
||||
bool agx_nir_opt_preamble(nir_shader *s, unsigned *sizes);
|
||||
bool agx_nir_lower_load_mask(nir_shader *shader);
|
||||
bool agx_nir_lower_ubo(nir_shader *shader);
|
||||
bool agx_nir_lower_shared_bitsize(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -8,18 +8,28 @@
|
|||
#include "util/macros.h"
|
||||
#include "agx_compiler.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
#include "nir_opcodes.h"
|
||||
|
||||
static bool
|
||||
is_promotable_texture_handle(nir_def *def)
|
||||
static nir_preamble_class
|
||||
preamble_class(nir_def *def)
|
||||
{
|
||||
nir_instr *instr = def->parent_instr;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
return nir_preamble_class_general;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
return intr->intrinsic == nir_intrinsic_bindless_image_agx &&
|
||||
nir_intrinsic_desc_set(intr) < 32 /* encoding restriction */;
|
||||
if (nir_intrinsic_has_desc_set(intr) && nir_intrinsic_desc_set(intr) >= 32)
|
||||
return nir_preamble_class_general /* encoding restriction */;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_agx)
|
||||
return nir_preamble_class_image;
|
||||
else if (intr->intrinsic == nir_intrinsic_bindless_sampler_agx)
|
||||
return nir_preamble_class_sampler;
|
||||
else
|
||||
return nir_preamble_class_general;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -30,8 +40,7 @@ def_size(nir_def *def, unsigned *size, unsigned *align,
|
|||
|
||||
*size = (bit_size * def->num_components) / 16;
|
||||
*align = bit_size / 16;
|
||||
*class = is_promotable_texture_handle(def) ? nir_preamble_class_image
|
||||
: nir_preamble_class_general;
|
||||
*class = preamble_class(def);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -238,6 +247,7 @@ instr_cost(nir_instr *instr, const void *data)
|
|||
case nir_intrinsic_ddy_coarse:
|
||||
return 1.0;
|
||||
case nir_intrinsic_bindless_image_agx:
|
||||
case nir_intrinsic_bindless_sampler_agx:
|
||||
/* It's worth promoting even with a constant source, but it doesn't
|
||||
* turn into instructions so should be less than any other normal
|
||||
* instruction... But just enough to get over the image rewrite_cost.
|
||||
|
|
@ -319,6 +329,9 @@ static const nir_opt_preamble_options preamble_options = {
|
|||
|
||||
/* We have at least 32 texture state registers. TODO: check for more? */
|
||||
.preamble_storage_size[nir_preamble_class_image] = 32,
|
||||
|
||||
/* We have at least 16 sampler state registers. TODO: check for more? */
|
||||
.preamble_storage_size[nir_preamble_class_sampler] = 16,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -345,6 +358,31 @@ lower_store_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
static bool
|
||||
lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_sampler_agx) {
|
||||
/* Rematerialize bindless_sampler_agx before store_preamble with only the
|
||||
* byte offset (first source), not the sampler index.
|
||||
*/
|
||||
nir_foreach_use_safe(use, &intr->def) {
|
||||
nir_instr *parent = nir_src_parent_instr(use);
|
||||
if (parent->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *pintr = nir_instr_as_intrinsic(parent);
|
||||
if (pintr->intrinsic != nir_intrinsic_store_preamble ||
|
||||
nir_intrinsic_preamble_class(pintr) != nir_preamble_class_sampler)
|
||||
continue;
|
||||
|
||||
b->cursor = nir_before_src(use);
|
||||
nir_def *repl =
|
||||
nir_bindless_sampler_agx(b, intr->src[0].ssa, nir_undef(b, 1, 16),
|
||||
.desc_set = nir_intrinsic_desc_set(intr));
|
||||
nir_src_rewrite(use, repl);
|
||||
}
|
||||
|
||||
/* Replace other uses with just the sampler index. */
|
||||
nir_def_replace(&intr->def, intr->src[1].ssa);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_load_preamble)
|
||||
return false;
|
||||
|
||||
|
|
@ -354,6 +392,7 @@ lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
unsigned base = nir_intrinsic_base(intr);
|
||||
nir_def *new_ = NULL;
|
||||
bool ts = nir_intrinsic_preamble_class(intr) == nir_preamble_class_image;
|
||||
bool ss = nir_intrinsic_preamble_class(intr) == nir_preamble_class_sampler;
|
||||
if (!ts && heaps[base] >= 0) {
|
||||
new_ = nir_bindless_image_agx(b, &intr->def, .desc_set = heaps[base]);
|
||||
}
|
||||
|
|
@ -374,13 +413,14 @@ lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
} else if (parent->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(parent);
|
||||
nir_tex_src *src = (nir_tex_src *)use;
|
||||
if (src->src_type != nir_tex_src_texture_handle)
|
||||
continue;
|
||||
|
||||
if (ts) {
|
||||
if (src->src_type == nir_tex_src_sampler_handle && ss) {
|
||||
nir_steal_tex_src(tex, nir_tex_src_sampler_handle);
|
||||
tex->sampler_index = base;
|
||||
} else if (src->src_type == nir_tex_src_texture_handle && ts) {
|
||||
nir_steal_tex_src(tex, nir_tex_src_texture_handle);
|
||||
tex->texture_index = base / 2;
|
||||
} else {
|
||||
} else if (src->src_type == nir_tex_src_texture_handle) {
|
||||
assert(new_ != NULL);
|
||||
nir_src_rewrite(use, new_);
|
||||
}
|
||||
|
|
@ -391,15 +431,10 @@ lower_preamble(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
}
|
||||
|
||||
bool
|
||||
agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size,
|
||||
unsigned *ts_count)
|
||||
agx_nir_opt_preamble(nir_shader *nir, unsigned *sizes)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
unsigned sizes[] = {*preamble_size, *ts_count};
|
||||
NIR_PASS(progress, nir, nir_opt_preamble, &preamble_options, sizes);
|
||||
*preamble_size = sizes[0];
|
||||
*ts_count = sizes[1];
|
||||
|
||||
if (progress) {
|
||||
int16_t heap[512];
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ write_sampled_image_view_desc(struct hk_descriptor_set *set,
|
|||
*/
|
||||
desc[plane].sampler_index =
|
||||
sampler->planes[sampler_plane].hw->index + 28;
|
||||
desc[plane].sampler = sampler->planes[sampler_plane].hw->key;
|
||||
desc[plane].lod_bias_fp16 = sampler->lod_bias_fp16;
|
||||
desc[plane].clamp_0_sampler_index_or_negative = -1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ struct hk_descriptor_set_layout;
|
|||
|
||||
struct hk_sampled_image_descriptor {
|
||||
struct agx_texture_packed tex;
|
||||
struct agx_sampler_packed sampler;
|
||||
uint16_t sampler_index;
|
||||
|
||||
/* Negative if there is no border colour, else the clamp=0 sampler index used
|
||||
|
|
@ -42,7 +43,7 @@ struct hk_sampled_image_descriptor {
|
|||
uint16_t pad;
|
||||
/* TODO: This should probably be a heap! */
|
||||
uint32_t border[4];
|
||||
uint8_t pad2[12];
|
||||
uint8_t pad2[4];
|
||||
};
|
||||
static_assert(sizeof(struct hk_sampled_image_descriptor) == 64,
|
||||
"hk_sampled_image_descriptor has no holes");
|
||||
|
|
|
|||
|
|
@ -671,8 +671,9 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
|
|||
if (sampler != NULL) {
|
||||
unsigned offs =
|
||||
offsetof(struct hk_sampled_image_descriptor, sampler_index);
|
||||
bool clamp_to_0 = tex->backend_flags & AGX_TEXTURE_FLAG_CLAMP_TO_0;
|
||||
|
||||
if (tex->backend_flags & AGX_TEXTURE_FLAG_CLAMP_TO_0) {
|
||||
if (clamp_to_0) {
|
||||
offs = offsetof(struct hk_sampled_image_descriptor,
|
||||
clamp_0_sampler_index_or_negative);
|
||||
}
|
||||
|
|
@ -681,6 +682,30 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
|
|||
b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
plane_offset_B + offs, ctx);
|
||||
|
||||
if (!clamp_to_0) {
|
||||
uint32_t set, binding;
|
||||
nir_def *idx;
|
||||
get_resource_deref_binding(b,
|
||||
nir_src_as_deref(nir_src_for_ssa(sampler)),
|
||||
&set, &binding, &idx);
|
||||
|
||||
const struct hk_descriptor_set_binding_layout *binding_layout =
|
||||
get_binding_layout(set, binding, ctx);
|
||||
|
||||
if (ctx->clamp_desc_array_bounds)
|
||||
idx =
|
||||
nir_umin(b, idx, nir_imm_int(b, binding_layout->array_size - 1));
|
||||
|
||||
assert(binding_layout->stride > 0);
|
||||
nir_def *desc_offs_B = nir_iadd_imm(
|
||||
b, nir_imul_imm(b, idx, binding_layout->stride),
|
||||
binding_layout->offset + plane_offset_B +
|
||||
offsetof(struct hk_sampled_image_descriptor, sampler));
|
||||
|
||||
index =
|
||||
nir_bindless_sampler_agx(b, desc_offs_B, index, .desc_set = set);
|
||||
}
|
||||
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, index);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -952,7 +952,8 @@ static bool
|
|||
lower_uniforms(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
struct fixed_uniforms *ctx = data;
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_agx) {
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_agx ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_sampler_agx) {
|
||||
/* Change of units from sets to uniforms */
|
||||
nir_intrinsic_set_desc_set(
|
||||
intr, ctx->sets + (nir_intrinsic_desc_set(intr) * 4));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue