diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 7767187eea2..a15e57c1c97 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1741,6 +1741,12 @@ intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) # Equivalent to popcount(ballot(true) & ((1 << subgroup_invocation) - 1)) system_value("active_subgroup_invocation_agx", 1) +# With [0, 1] clipping, no transform is needed on the output z' = z. But with [-1, +# 1] clipping, we need to transform z' = (z + w) / 2. We express both cases as a +# lerp between z and w, where this is the lerp coefficient: 0 for [0, 1] and 0.5 +# for [-1, 1]. +system_value("clip_z_coeff_agx", 1) + # mesa_prim for the input topology (in a geometry shader) system_value("input_topology_agx", 1) diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index ae07488c443..be646a269e4 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -179,6 +179,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return load_sysval_root(b, 1, 32, &u->fixed_point_size); case nir_intrinsic_load_tex_sprite_mask_agx: return load_sysval_root(b, 1, 16, &u->sprite_mask); + case nir_intrinsic_load_clip_z_coeff_agx: + return nir_f2f32(b, load_sysval_root(b, 1, 16, &u->clip_z_coeff)); case nir_intrinsic_load_polygon_stipple_agx: { nir_def *base = load_sysval_root(b, 1, 64, &u->polygon_stipple); nir_def *row = intr->src[0].ssa; diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 3ddda8832cf..4a41c1c5904 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1675,7 +1675,7 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs, return ptr.gpu; } -/* nir_lower_clip_halfz analogue for lowered I/O */ +/* Dynamic lowered I/O version of nir_lower_clip_halfz */ static bool agx_nir_lower_clip_m1_1(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) @@ -1691,8 +1691,10 @@ agx_nir_lower_clip_m1_1(nir_builder *b, nir_intrinsic_instr *intr, nir_def *pos = intr->src[0].ssa; nir_def *z = nir_channel(b, pos, 2); nir_def *w = nir_channel(b, pos, 3); + nir_def *c = nir_load_clip_z_coeff_agx(b); - nir_def *new_z = nir_fmul_imm(b, nir_fadd(b, z, w), 0.5f); + /* Lerp. If c = 0, reduces to z. If c = 1/2, reduces to (z + w)/2 */ + nir_def *new_z = nir_ffma(b, nir_fneg(b, z), c, nir_ffma(b, w, c, z)); nir_src_rewrite(&intr->src[0], nir_vector_insert_imm(b, pos, new_z, 2)); return true; } @@ -1893,12 +1895,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, if (key->next_stage == ASAHI_VS_FS) { NIR_PASS(_, nir, agx_nir_lower_point_size, key->next.fs.fixed_point_size); - - if (!key->next.fs.clip_halfz) { - NIR_PASS(_, nir, nir_shader_intrinsics_pass, - agx_nir_lower_clip_m1_1, - nir_metadata_block_index | nir_metadata_dominance, NULL); - } + NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1, + nir_metadata_block_index | nir_metadata_dominance, NULL); } else if (key->next_stage == ASAHI_VS_GS) { NIR_PASS(_, nir, agx_nir_lower_sysvals, PIPE_SHADER_VERTEX, false); NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx, @@ -2080,12 +2078,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, /* TODO: deduplicate */ NIR_PASS(_, gs_copy, agx_nir_lower_point_size, key->fixed_point_size); - - if (!key->clip_halfz) { - NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass, - agx_nir_lower_clip_m1_1, - nir_metadata_block_index | nir_metadata_dominance, NULL); - } + NIR_PASS(_, gs_copy, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1, + nir_metadata_block_index | nir_metadata_dominance, NULL); base_key.vs.outputs_flat_shaded = key->outputs_flat_shaded; base_key.vs.outputs_linear_shaded = key->outputs_linear_shaded; @@ -2434,7 +2428,7 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B) /* Only proceed if the shader or anything the key depends on changes * * vb_mask, attributes, vertex_buffers: VERTEX - * clip_halfz: RS + * point_size_per_vertex: RS * outputs_{flat,linear}_shaded: FS_PROG */ if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB | @@ -2456,8 +2450,6 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B) }; if (key.next_stage == ASAHI_VS_FS) { - key.next.fs.clip_halfz = ctx->rast->base.clip_halfz; - /* If we are not rasterizing points, don't set fixed_point_size to * eliminate the useless point size write. */ @@ -2533,7 +2525,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info, .rasterizer_discard = ctx->rast->base.rasterizer_discard, /* TODO: Deduplicate */ - .clip_halfz = ctx->rast->base.clip_halfz, .fixed_point_size = !ctx->rast->base.point_size_per_vertex && rasterized_prim == MESA_PRIM_POINTS, .outputs_flat_shaded = diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 7d88bd98261..4b8bbad13c8 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -150,6 +150,9 @@ struct PACKED agx_draw_uniforms { /* Nonzero for indexed draws, zero otherwise */ uint16_t is_indexed_draw; + + /* Zero for [0, 1] clipping, 0.5 for [-1, 1] clipping. */ + uint16_t clip_z_coeff; }; struct PACKED agx_stage_uniforms { @@ -457,7 +460,6 @@ struct asahi_vs_shader_key { } gs; struct { - bool clip_halfz; bool fixed_point_size; uint64_t outputs_flat_shaded; uint64_t outputs_linear_shaded; @@ -507,12 +509,11 @@ struct asahi_gs_shader_key { /* Rasterizer shader key */ uint64_t outputs_flat_shaded; uint64_t outputs_linear_shaded; - bool clip_halfz; bool fixed_point_size; /* If true, this GS is run only for its side effects (including XFB) */ bool rasterizer_discard; - bool padding[5]; + bool padding[6]; }; static_assert(sizeof(struct asahi_gs_shader_key) == 24, "no holes"); diff --git a/src/gallium/drivers/asahi/agx_uniforms.c b/src/gallium/drivers/asahi/agx_uniforms.c index 94b5a08202e..5d8b137d6c3 100644 --- a/src/gallium/drivers/asahi/agx_uniforms.c +++ b/src/gallium/drivers/asahi/agx_uniforms.c @@ -6,6 +6,7 @@ #include "asahi/lib/agx_pack.h" #include "pipe/p_state.h" #include "util/format/u_format.h" +#include "util/half_float.h" #include "util/macros.h" #include "agx_state.h" #include "pool.h" @@ -102,9 +103,14 @@ agx_upload_uniforms(struct agx_batch *batch) batch->uniforms.tables[AGX_SYSVAL_TABLE_ROOT] = root_ptr.gpu; batch->uniforms.sample_mask = ctx->sample_mask; - batch->uniforms.sprite_mask = (batch->reduced_prim == MESA_PRIM_POINTS) - ? ctx->rast->base.sprite_coord_enable - : 0; + assert(_mesa_float_to_half(0.5) == 0x3800); + batch->uniforms.clip_z_coeff = + (ctx->rast && !ctx->rast->base.clip_halfz) ? 0x3800 : 0x0; + + batch->uniforms.sprite_mask = + (batch->reduced_prim == MESA_PRIM_POINTS && ctx->rast) + ? ctx->rast->base.sprite_coord_enable + : 0; memcpy(root_ptr.cpu, &batch->uniforms, sizeof(batch->uniforms)); }