asahi: move more code out of agx_preprocess_nir

we need to gather tex masks  / lower mediump io before lowering textures for our
detection to work ... also we want driver-side i/o soon lowering for Marek's
thing anyway. do some code motion / pass reordering to make this doable.

in doing so, we get rid of agx_uncompiled_shader_info which is probably good.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
Alyssa Rosenzweig 2024-02-15 13:35:21 -04:00 committed by Marge Bot
parent d6800d5cc6
commit 0a8d0217c9
7 changed files with 77 additions and 104 deletions

View file

@ -505,14 +505,13 @@ main(int argc, char **argv)
nir_call(&b, nir_function_clone(b.shader, func));
UNUSED struct agx_uncompiled_shader_info info;
UNUSED struct agx_shader_info compiled_info;
struct agx_shader_key key = {
.libagx = nir,
.is_helper = true,
};
agx_preprocess_nir(b.shader, nir, false, &info);
agx_preprocess_nir(b.shader, nir);
agx_compile_shader_nir(b.shader, &key, NULL, &binary, &compiled_info);
/* Pad out */

View file

@ -2312,12 +2312,6 @@ agx_dump_stats(agx_context *ctx, unsigned size, char **out)
nr_threads, ctx->loop_count, ctx->spills, ctx->fills);
}
static int
glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static bool
agx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_)
{
@ -2613,7 +2607,7 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings,
* conformant not to, but every app gets this wrong.
*/
static bool
agx_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
{
uint64_t *mask = data;
@ -2648,15 +2642,10 @@ agx_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
return false;
}
struct interp_masks {
uint64_t flat;
uint64_t linear;
};
static bool
agx_gather_interp(nir_builder *b, nir_intrinsic_instr *intr, void *data)
gather_interp(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
struct interp_masks *masks = data;
struct agx_interp_info *masks = data;
if (intr->intrinsic == nir_intrinsic_load_input) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
@ -2673,16 +2662,15 @@ agx_gather_interp(nir_builder *b, nir_intrinsic_instr *intr, void *data)
/*
* Build a bit mask of varyings (by location) that are flatshaded and linear
* shaded. This information is needed by lower_mediump_io and
* agx_uncompiled_shader_info.
* shaded. This information is needed by the driver.
*/
static struct interp_masks
agx_interp_masks(nir_shader *nir)
struct agx_interp_info
agx_gather_interp_info(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_FRAGMENT);
struct interp_masks masks = {0};
nir_shader_intrinsics_pass(nir, agx_gather_interp, nir_metadata_all, &masks);
struct agx_interp_info masks = {0};
nir_shader_intrinsics_pass(nir, gather_interp, nir_metadata_all, &masks);
return masks;
}
@ -2690,14 +2678,13 @@ agx_interp_masks(nir_shader *nir)
* Build a bit mask of varyings (by location) that are used as texture
* coordinates. This information is needed by lower_mediump_io.
*/
static uint64_t
agx_texcoord_mask(nir_shader *nir)
uint64_t
agx_gather_texcoords(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_FRAGMENT);
uint64_t mask = 0;
nir_shader_instructions_pass(nir, agx_gather_texcoords, nir_metadata_all,
&mask);
nir_shader_instructions_pass(nir, gather_texcoords, nir_metadata_all, &mask);
return mask;
}
@ -2944,32 +2931,10 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
glsl_get_cl_type_size_align);
}
/*
* Preprocess NIR. In particular, this lowers I/O. Drivers should call this
* as soon as they don't need unlowered I/O.
*
* This also lowers as much as possible. After preprocessing NIR, the following
* NIR passes are called by the GL driver:
*
* - nir_lower_blend
* - nir_lower_texcoord_replace_late
* - agx_nir_lower_vbo
* - agx_nir_lower_tilebuffer
*
* Unless an instruction is constructed by one of the above passes, it should be
* lowered here to avoid duplicate work with shader variants.
*/
/* Preprocess NIR independent of shader state */
void
agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
bool allow_mediump, struct agx_uncompiled_shader_info *out)
agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx)
{
if (out) {
memset(out, 0, sizeof(*out));
out->nr_bindful_textures = BITSET_LAST_BIT(nir->info.textures_used);
out->nr_bindful_images = BITSET_LAST_BIT(nir->info.images_used);
}
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
/* Lower large arrays to scratch and small arrays to csel */
@ -2979,36 +2944,13 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
NIR_PASS(_, nir, nir_lower_var_copies);
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
glsl_type_size, nir_lower_io_lower_64bit_to_32);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct interp_masks masks = agx_interp_masks(nir);
NIR_PASS(_, nir, agx_nir_lower_frag_sidefx);
/* Interpolate varyings at fp16 and write to the tilebuffer at fp16. As an
* exception, interpolate flat shaded at fp32. This works around a
* hardware limitation. The resulting code (with an extra f2f16 at the end
* if needed) matches what Metal produces.
*/
if (likely(allow_mediump)) {
uint64_t texcoord = agx_texcoord_mask(nir);
NIR_PASS(_, nir, nir_lower_mediump_io,
nir_var_shader_in | nir_var_shader_out,
~(masks.flat | texcoord), false);
}
if (out) {
out->inputs_flat_shaded = masks.flat;
out->inputs_linear_shaded = masks.linear;
}
} else if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE;
out->cull_distance_size = nir->info.cull_distance_array_size;
if (out->cull_distance_size)
if (nir->info.cull_distance_array_size)
NIR_PASS(_, nir, agx_nir_lower_cull_distance_vs);
}

View file

@ -98,14 +98,10 @@ union agx_varyings {
struct agx_varyings_fs fs;
};
struct agx_uncompiled_shader_info {
uint64_t inputs_flat_shaded;
uint64_t inputs_linear_shaded;
uint8_t cull_distance_size;
bool has_edgeflags;
/* Number of bindful textures, images used */
unsigned nr_bindful_textures, nr_bindful_images;
struct agx_interp_info {
/* Bit masks indexed by I/O location of flat and linear varyings */
uint64_t flat;
uint64_t linear;
};
struct agx_shader_info {
@ -243,10 +239,10 @@ struct agx_shader_key {
};
};
void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
bool allow_mediump,
struct agx_uncompiled_shader_info *out);
struct agx_interp_info agx_gather_interp_info(nir_shader *nir);
uint64_t agx_gather_texcoords(nir_shader *nir);
void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx);
bool agx_nir_lower_discard_zs_emit(nir_shader *s);
bool agx_nir_lower_sample_mask(nir_shader *s);

View file

@ -34,7 +34,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
util_dynarray_init(&binary, NULL);
agx_nir_lower_texture(shader);
agx_preprocess_nir(shader, cache->dev->libagx, false, NULL);
agx_preprocess_nir(shader, cache->dev->libagx);
if (tib) {
unsigned bindless_base = 0;
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, true);

View file

@ -359,10 +359,7 @@ agx_nir_create_geometry_count_shader(nir_shader *gs, const nir_shader *libagx,
NIR_PASS(_, shader, nir_shader_intrinsics_pass, lower_id,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Preprocess it */
UNUSED struct agx_uncompiled_shader_info info;
agx_preprocess_nir(shader, libagx, false, &info);
agx_preprocess_nir(shader, libagx);
return shader;
}
@ -549,10 +546,7 @@ agx_nir_create_gs_rast_shader(const nir_shader *gs, const nir_shader *libagx)
nir_opt_idiv_const(shader, 16);
/* Preprocess it */
UNUSED struct agx_uncompiled_shader_info info;
agx_preprocess_nir(shader, libagx, false, &info);
agx_preprocess_nir(shader, libagx);
return shader;
}
@ -988,10 +982,7 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, const nir_shader *libagx,
nir_load_stat_query_address_agx(b, .base = PIPE_STAT_QUERY_C_INVOCATIONS),
emitted_prims);
/* Preprocess it */
UNUSED struct agx_uncompiled_shader_info info;
agx_preprocess_nir(b->shader, libagx, false, &info);
agx_preprocess_nir(b->shader, libagx);
return b->shader;
}

View file

@ -2142,6 +2142,12 @@ agx_get_shader_variant(struct agx_screen *screen, struct pipe_context *pctx,
return compiled;
}
static int
glsl_type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static void
agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
nir_shader *nir, bool support_lod_bias, bool robust)
@ -2187,11 +2193,41 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
NIR_PASS(_, nir, nir_lower_fragcolor, 8);
}
/* We need to do some I/O lowering before lowering textures */
so->info.nr_bindful_textures = BITSET_LAST_BIT(nir->info.textures_used);
so->info.nr_bindful_images = BITSET_LAST_BIT(nir->info.images_used);
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
glsl_type_size, nir_lower_io_lower_64bit_to_32);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct agx_interp_info interp = agx_gather_interp_info(nir);
/* Interpolate varyings at fp16 and write to the tilebuffer at fp16. As an
* exception, interpolate flat shaded at fp32. This works around a
* hardware limitation. The resulting code (with an extra f2f16 at the end
* if needed) matches what Metal produces.
*/
if (likely(!(dev->debug & AGX_DBG_NO16))) {
uint64_t texcoord = agx_gather_texcoords(nir);
NIR_PASS(_, nir, nir_lower_mediump_io,
nir_var_shader_in | nir_var_shader_out,
~(interp.flat | texcoord), false);
}
so->info.inputs_flat_shaded = interp.flat;
so->info.inputs_linear_shaded = interp.linear;
} else if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
so->info.has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE;
so->info.cull_distance_size = nir->info.cull_distance_array_size;
}
NIR_PASS(_, nir, agx_nir_lower_texture);
NIR_PASS(_, nir, nir_lower_ssbo);
bool allow_mediump = !(dev->debug & AGX_DBG_NO16);
agx_preprocess_nir(nir, dev->libagx, allow_mediump, &so->info);
agx_preprocess_nir(nir, dev->libagx);
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
(nir->info.inputs_read & VARYING_BITS_TEX_ANY)) {
@ -2768,8 +2804,7 @@ agx_build_meta_shader(struct agx_context *ctx, meta_shader_builder_t builder,
builder(&b, data);
struct agx_device *dev = agx_device(ctx->base.screen);
UNUSED struct agx_uncompiled_shader_info info;
agx_preprocess_nir(b.shader, dev->libagx, false, &info);
agx_preprocess_nir(b.shader, dev->libagx);
struct agx_shader_key base_key = {0};
struct agx_compiled_shader *shader =

View file

@ -234,7 +234,17 @@ struct agx_uncompiled_shader {
struct blob early_serialized_nir;
struct blob serialized_nir;
uint8_t nir_sha1[20];
struct agx_uncompiled_shader_info info;
struct {
uint64_t inputs_flat_shaded;
uint64_t inputs_linear_shaded;
uint8_t cull_distance_size;
bool has_edgeflags;
/* Number of bindful textures, images used */
unsigned nr_bindful_textures, nr_bindful_images;
} info;
struct hash_table *variants;
struct agx_uncompiled_shader *passthrough_progs[MESA_PRIM_COUNT][3][2];
struct agx_uncompiled_shader *passthrough_tcs[32];