asahi: make clip_halfz dynamic

we could move this to the linker but meh, this is good enough for now

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
Alyssa Rosenzweig 2024-02-04 14:24:18 -04:00 committed by Marge Bot
parent beb7ddba2b
commit cb0b027c59
5 changed files with 30 additions and 24 deletions

View file

@ -1741,6 +1741,12 @@ intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
# Equivalent to popcount(ballot(true) & ((1 << subgroup_invocation) - 1))
system_value("active_subgroup_invocation_agx", 1)
# With [0, 1] clipping, no transform is needed on the output z' = z. But with [-1,
# 1] clipping, we need to transform z' = (z + w) / 2. We express both cases as a
# lerp between z and w, where this is the lerp coefficient: 0 for [0, 1] and 0.5
# for [-1, 1].
system_value("clip_z_coeff_agx", 1)
# mesa_prim for the input topology (in a geometry shader)
system_value("input_topology_agx", 1)

View file

@ -179,6 +179,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
return load_sysval_root(b, 1, 32, &u->fixed_point_size);
case nir_intrinsic_load_tex_sprite_mask_agx:
return load_sysval_root(b, 1, 16, &u->sprite_mask);
case nir_intrinsic_load_clip_z_coeff_agx:
return nir_f2f32(b, load_sysval_root(b, 1, 16, &u->clip_z_coeff));
case nir_intrinsic_load_polygon_stipple_agx: {
nir_def *base = load_sysval_root(b, 1, 64, &u->polygon_stipple);
nir_def *row = intr->src[0].ssa;

View file

@ -1675,7 +1675,7 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs,
return ptr.gpu;
}
/* nir_lower_clip_halfz analogue for lowered I/O */
/* Dynamic lowered I/O version of nir_lower_clip_halfz */
static bool
agx_nir_lower_clip_m1_1(nir_builder *b, nir_intrinsic_instr *intr,
UNUSED void *data)
@ -1691,8 +1691,10 @@ agx_nir_lower_clip_m1_1(nir_builder *b, nir_intrinsic_instr *intr,
nir_def *pos = intr->src[0].ssa;
nir_def *z = nir_channel(b, pos, 2);
nir_def *w = nir_channel(b, pos, 3);
nir_def *c = nir_load_clip_z_coeff_agx(b);
nir_def *new_z = nir_fmul_imm(b, nir_fadd(b, z, w), 0.5f);
/* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */
nir_def *new_z = nir_ffma(b, nir_fneg(b, z), c, nir_ffma(b, w, c, z));
nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2));
return true;
}
@ -1893,12 +1895,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
if (key->next_stage == ASAHI_VS_FS) {
NIR_PASS(_, nir, agx_nir_lower_point_size,
key->next.fs.fixed_point_size);
if (!key->next.fs.clip_halfz) {
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
} else if (key->next_stage == ASAHI_VS_GS) {
NIR_PASS(_, nir, agx_nir_lower_sysvals, PIPE_SHADER_VERTEX, false);
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx,
@ -2080,12 +2078,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
/* TODO: deduplicate */
NIR_PASS(_, gs_copy, agx_nir_lower_point_size, key->fixed_point_size);
if (!key->clip_halfz) {
NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass,
agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
base_key.vs.outputs_flat_shaded = key->outputs_flat_shaded;
base_key.vs.outputs_linear_shaded = key->outputs_linear_shaded;
@ -2434,7 +2428,7 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
/* Only proceed if the shader or anything the key depends on changes
*
* vb_mask, attributes, vertex_buffers: VERTEX
* clip_halfz: RS
* point_size_per_vertex: RS
* outputs_{flat,linear}_shaded: FS_PROG
*/
if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
@ -2456,8 +2450,6 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
};
if (key.next_stage == ASAHI_VS_FS) {
key.next.fs.clip_halfz = ctx->rast->base.clip_halfz;
/* If we are not rasterizing points, don't set fixed_point_size to
* eliminate the useless point size write.
*/
@ -2533,7 +2525,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
.rasterizer_discard = ctx->rast->base.rasterizer_discard,
/* TODO: Deduplicate */
.clip_halfz = ctx->rast->base.clip_halfz,
.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
rasterized_prim == MESA_PRIM_POINTS,
.outputs_flat_shaded =

View file

@ -150,6 +150,9 @@ struct PACKED agx_draw_uniforms {
/* Nonzero for indexed draws, zero otherwise */
uint16_t is_indexed_draw;
/* Zero for [0, 1] clipping, 0.5 for [-1, 1] clipping. */
uint16_t clip_z_coeff;
};
struct PACKED agx_stage_uniforms {
@ -457,7 +460,6 @@ struct asahi_vs_shader_key {
} gs;
struct {
bool clip_halfz;
bool fixed_point_size;
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
@ -507,12 +509,11 @@ struct asahi_gs_shader_key {
/* Rasterizer shader key */
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
bool clip_halfz;
bool fixed_point_size;
/* If true, this GS is run only for its side effects (including XFB) */
bool rasterizer_discard;
bool padding[5];
bool padding[6];
};
static_assert(sizeof(struct asahi_gs_shader_key) == 24, "no holes");

View file

@ -6,6 +6,7 @@
#include "asahi/lib/agx_pack.h"
#include "pipe/p_state.h"
#include "util/format/u_format.h"
#include "util/half_float.h"
#include "util/macros.h"
#include "agx_state.h"
#include "pool.h"
@ -102,9 +103,14 @@ agx_upload_uniforms(struct agx_batch *batch)
batch->uniforms.tables[AGX_SYSVAL_TABLE_ROOT] = root_ptr.gpu;
batch->uniforms.sample_mask = ctx->sample_mask;
batch->uniforms.sprite_mask = (batch->reduced_prim == MESA_PRIM_POINTS)
? ctx->rast->base.sprite_coord_enable
: 0;
assert(_mesa_float_to_half(0.5) == 0x3800);
batch->uniforms.clip_z_coeff =
(ctx->rast && !ctx->rast->base.clip_halfz) ? 0x3800 : 0x0;
batch->uniforms.sprite_mask =
(batch->reduced_prim == MESA_PRIM_POINTS && ctx->rast)
? ctx->rast->base.sprite_coord_enable
: 0;
memcpy(root_ptr.cpu, &batch->uniforms, sizeof(batch->uniforms));
}