asahi: rewrite pointsize handling

In the wise words of Mike Blumenkrantz, "I hate gl_PointSize and so can you". The mesa/st lowering won't mesh well with vertex shader epilogues, and it falls over in various circumstances. I am too tired to go against the grain, so let's just pretend to be a normal gallium driver and trust in the rasterizer CSO, lowering point size internally. This properly handles transform feedback without any hacks, both GL and GLES behaviours, etc. Fixes: KHR-GL31.transform_feedback.capture_vertex_separate_test gl-2.0-large-point-fs Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
2026-05-08 02:38:04 +02:00 · 2023-11-27 14:39:56 -04:00 · 2023-11-27 14:39:56 -04:00 · c43c90a5fa
commit c43c90a5fa
parent aa1849a541
8 changed files with 109 additions and 35 deletions
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@ -2906,10 +2906,6 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,

   NIR_PASS_V(nir, nir_lower_vars_to_ssa);

-   if (nir->info.stage == MESA_SHADER_VERTEX) {
-      NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0);
-   }
-
   /* Lower large arrays to scratch and small arrays to csel */
   NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16,
              glsl_get_natural_size_align_bytes);
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@ -1842,6 +1842,9 @@ system_value("api_sample_mask_agx", 1, bit_sizes=[16])
 # Loads the sample position array as fixed point packed into a 32-bit word
 system_value("sample_positions_agx", 1, bit_sizes=[32])

+# Loads the fixed-function glPointSize() value
+system_value("fixed_point_size_agx", 1, bit_sizes=[32])
+
 # Image loads go through the texture cache, which is not coherent with the PBE
 # or memory access, so fencing is necessary for writes to become visible.

--- a/src/gallium/drivers/asahi/agx_nir_lower_point_size.c
+++ b/src/gallium/drivers/asahi/agx_nir_lower_point_size.c
@ -0,0 +1,58 @@
+/*
+ * Copyright 2023 Valve Corporation
+ * SPDX-License-Identifier: MIT
+ */
+#include "agx_state.h"
+#include "nir_builder.h"
+
+/*
+ * gl_PointSize lowering. This runs late on a vertex shader (epilogue). By this
+ * time, I/O has been lowered, and transform feedback has been written. Point
+ * size will thus only get consumed by the rasterizer, so we can clamp/replace.
+ * We do this instead of the mesa/st lowerings for better behaviour with lowered
+ * I/O and vertex epilogues.
+ */
+
+static bool
+pass(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   bool *program_point_size = data;
+   b->cursor = nir_before_instr(&intr->instr);
+
+   if ((intr->intrinsic != nir_intrinsic_store_output) ||
+       (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ))
+      return false;
+
+   if (*program_point_size) {
+      /* We want to use this point size. Clamp it. */
+      nir_src_rewrite(&intr->src[0],
+                      nir_fmax(b, intr->src[0].ssa, nir_imm_float(b, 1.0f)));
+   } else {
+      /* We want to override point size. Remove this store. */
+      nir_instr_remove(&intr->instr);
+   }
+
+   return true;
+}
+
+void
+agx_nir_lower_point_size(nir_shader *nir, bool program_point_size)
+{
+   /* Handle existing point size write */
+   nir_shader_intrinsics_pass(nir, pass,
+                              nir_metadata_block_index | nir_metadata_dominance,
+                              &program_point_size);
+
+   /* Write the fixed-function point size if we have one */
+   if (!program_point_size) {
+      nir_builder b =
+         nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(nir)));
+
+      nir_store_output(
+         &b, nir_load_fixed_point_size_agx(&b), nir_imm_int(&b, 0),
+         .io_semantics.location = VARYING_SLOT_PSIZ,
+         .io_semantics.num_slots = 1, .write_mask = nir_component_mask(1));
+
+      nir->info.outputs_written |= VARYING_BIT_PSIZ;
+   }
+}
--- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
+++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
@ -158,6 +158,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
      return load_sysval_root(b, 1, 64, &u->input_assembly);
   case nir_intrinsic_load_geometry_param_buffer_agx:
      return load_sysval_root(b, 1, 64, &u->geometry_params);
+   case nir_intrinsic_load_fixed_point_size_agx:
+      return load_sysval_root(b, 1, 32, &u->fixed_point_size);
   default:
      break;
   }
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@ -1697,13 +1697,6 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
      return 1;

-   /* We run nir_lower_point_size so we need the GLSL linker to copy
-    * the original gl_PointSize when captured by transform feedback. We could
-    * also copy it ourselves but it's easier to set the CAP.
-    */
-   case PIPE_CAP_PSIZ_CLAMPED:
-      return 1;
-
   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
      return 16384;
   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
@ -1758,7 +1751,6 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_FLATSHADE:
   case PIPE_CAP_TWO_SIDED_COLOR:
   case PIPE_CAP_ALPHA_TEST:
-   case PIPE_CAP_POINT_SIZE_FIXED:
   case PIPE_CAP_CLIP_PLANES:
   case PIPE_CAP_NIR_IMAGES_AS_DEREF:
      return 0;
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@ -50,6 +50,7 @@
 #include "agx_nir_lower_gs.h"
 #include "agx_tilebuffer.h"
 #include "nir_builder.h"
+#include "nir_builder_opcodes.h"
 #include "nir_intrinsics.h"
 #include "nir_intrinsics_indices.h"
 #include "pool.h"
@ -1697,6 +1698,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
      struct asahi_vs_shader_key *key = &key_->vs;

      NIR_PASS_V(nir, agx_nir_lower_vbo, &key->vbuf);
+      NIR_PASS_V(nir, agx_nir_lower_point_size, key->program_point_size);

      if (should_lower_clip_m1_1(dev, key->clip_halfz)) {
         NIR_PASS_V(nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
@ -2126,6 +2128,19 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
   return true;
 }

+static enum mesa_prim
+rast_prim(enum mesa_prim mode, unsigned fill_mode)
+{
+   if (u_reduced_prim(mode) == MESA_PRIM_TRIANGLES) {
+      if (fill_mode == PIPE_POLYGON_MODE_POINT)
+         return MESA_PRIM_POINTS;
+      else if (fill_mode == PIPE_POLYGON_MODE_LINE)
+         return MESA_PRIM_LINES;
+   }
+
+   return mode;
+}
+
 static bool
 agx_update_vs(struct agx_context *ctx)
 {
@ -2136,12 +2151,22 @@ agx_update_vs(struct agx_context *ctx)
    * outputs_{flat,linear}_shaded: FS_PROG
    */
   if (!(ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
-                       AGX_DIRTY_FS_PROG | AGX_DIRTY_RS)))
+                       AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)))
      return false;

+   enum mesa_prim rasterized_prim =
+      rast_prim(ctx->batch->reduced_prim, ctx->rast->base.fill_front);
+
   struct asahi_vs_shader_key key = {
      .vbuf.count = util_last_bit(ctx->vb_mask),
      .clip_halfz = ctx->rast->base.clip_halfz,
+
+      /* If we are not rasterizing points, set program_point_size to eliminate
+       * the useless point size write.
+       */
+      .program_point_size = ctx->rast->base.point_size_per_vertex ||
+                            rasterized_prim != MESA_PRIM_POINTS,
+
      .outputs_flat_shaded =
         ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
      .outputs_linear_shaded =
@ -3824,19 +3849,6 @@ agx_needs_passthrough_gs(struct agx_context *ctx,
   return false;
 }

-static enum mesa_prim
-rast_prim(enum mesa_prim mode, unsigned fill_mode)
-{
-   if (u_reduced_prim(mode) == MESA_PRIM_TRIANGLES) {
-      if (fill_mode == PIPE_POLYGON_MODE_POINT)
-         return MESA_PRIM_POINTS;
-      else if (fill_mode == PIPE_POLYGON_MODE_LINE)
-         return MESA_PRIM_LINES;
-   }
-
-   return mode;
-}
-
 static struct agx_uncompiled_shader *
 agx_get_passthrough_gs(struct agx_context *ctx,
                       struct agx_uncompiled_shader *prev_cso,
@ -4031,6 +4043,14 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,

   agx_batch_init_state(batch);

+   /* Dirty track the reduced prim: lines vs points vs triangles. Happens before
+    * agx_update_vs/agx_update_fs, which specialize based on primitive.
+    */
+   enum mesa_prim reduced_prim = u_reduced_prim(info->mode);
+   if (reduced_prim != batch->reduced_prim)
+      ctx->dirty |= AGX_DIRTY_PRIM;
+   batch->reduced_prim = reduced_prim;
+
   /* Update shaders first so we can use them after */
   if (agx_update_vs(ctx)) {
      ctx->dirty |= AGX_DIRTY_VS | AGX_DIRTY_VS_PROG;
@ -4060,14 +4080,6 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
      ctx->dirty |= AGX_DIRTY_VS;
   }

-   /* Dirty track the reduced prim: lines vs points vs triangles. Happens before
-    * agx_update_fs, which specializes based on primitive.
-    */
-   enum mesa_prim reduced_prim = u_reduced_prim(info->mode);
-   if (reduced_prim != batch->reduced_prim)
-      ctx->dirty |= AGX_DIRTY_PRIM;
-   batch->reduced_prim = reduced_prim;
-
   if (agx_update_fs(batch)) {
      ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG;
      ctx->stage[PIPE_SHADER_FRAGMENT].dirty = ~0;
@ -4132,8 +4144,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
             sizeof(ctx->blend_color));
   }

+   if (IS_DIRTY(RS)) {
+      batch->uniforms.fixed_point_size = ctx->rast->base.point_size;
+   }
+
   if (IS_DIRTY(VS) || IS_DIRTY(FS) || ctx->gs || IS_DIRTY(VERTEX) ||
-       IS_DIRTY(BLEND_COLOR)) {
+       IS_DIRTY(BLEND_COLOR) || IS_DIRTY(RS)) {

      agx_upload_uniforms(batch);
   }
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@ -108,6 +108,9 @@ struct PACKED agx_draw_uniforms {
   /* Blend constant if any */
   float blend_constant[4];

+   /* glPointSize value */
+   float fixed_point_size;
+
   /* Value of the multisample control register, containing sample positions in
    * each byte (x in low nibble, y in high nibble).
    */
@ -364,6 +367,7 @@ struct agx_blend {
 struct asahi_vs_shader_key {
   struct agx_vbufs vbuf;
   bool clip_halfz;
+   bool program_point_size;
   uint64_t outputs_flat_shaded;
   uint64_t outputs_linear_shaded;
 };
@ -796,6 +800,8 @@ void agx_upload_uniforms(struct agx_batch *batch);
 uint64_t agx_upload_stage_uniforms(struct agx_batch *batch, uint64_t textures,
                                   enum pipe_shader_type stage);

+void agx_nir_lower_point_size(nir_shader *nir, bool program_point_size);
+
 bool agx_nir_lower_sysvals(nir_shader *shader, bool lower_draw_params);

 bool agx_nir_layout_uniforms(nir_shader *shader,
--- a/src/gallium/drivers/asahi/meson.build
+++ b/src/gallium/drivers/asahi/meson.build
@ -10,6 +10,7 @@ files_asahi = files(
  'agx_pipe.c',
  'agx_nir_lower_sysvals.c',
  'agx_nir_lower_bindings.c',
+  'agx_nir_lower_point_size.c',
  'agx_query.c',
  'agx_state.c',
  'agx_streamout.c',