From c43c90a5faf37f48e245165c5c3df837c5d08db0 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 27 Nov 2023 14:39:56 -0400 Subject: [PATCH] asahi: rewrite pointsize handling In the wise words of Mike Blumenkrantz, "I hate gl_PointSize and so can you". The mesa/st lowering won't mesh well with vertex shader epilogues, and it falls over in various circumstances. I am too tired to go against the grain, so let's just pretend to be a normal gallium driver and trust in the rasterizer CSO, lowering point size internally. This properly handles transform feedback without any hacks, both GL and GLES behaviours, etc. Fixes: KHR-GL31.transform_feedback.capture_vertex_separate_test gl-2.0-large-point-fs Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 4 -- src/compiler/nir/nir_intrinsics.py | 3 + .../drivers/asahi/agx_nir_lower_point_size.c | 58 +++++++++++++++++ .../drivers/asahi/agx_nir_lower_sysvals.c | 2 + src/gallium/drivers/asahi/agx_pipe.c | 8 --- src/gallium/drivers/asahi/agx_state.c | 62 ++++++++++++------- src/gallium/drivers/asahi/agx_state.h | 6 ++ src/gallium/drivers/asahi/meson.build | 1 + 8 files changed, 109 insertions(+), 35 deletions(-) create mode 100644 src/gallium/drivers/asahi/agx_nir_lower_point_size.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 6c008fabc5b..b3a073cb83d 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2906,10 +2906,6 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx, NIR_PASS_V(nir, nir_lower_vars_to_ssa); - if (nir->info.stage == MESA_SHADER_VERTEX) { - NIR_PASS_V(nir, nir_lower_point_size, 1.0, 0.0); - } - /* Lower large arrays to scratch and small arrays to csel */ NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16, glsl_get_natural_size_align_bytes); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 48c7a28c527..feab2cd4974 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1842,6 +1842,9 @@ system_value("api_sample_mask_agx", 1, bit_sizes=[16]) # Loads the sample position array as fixed point packed into a 32-bit word system_value("sample_positions_agx", 1, bit_sizes=[32]) +# Loads the fixed-function glPointSize() value +system_value("fixed_point_size_agx", 1, bit_sizes=[32]) + # Image loads go through the texture cache, which is not coherent with the PBE # or memory access, so fencing is necessary for writes to become visible. diff --git a/src/gallium/drivers/asahi/agx_nir_lower_point_size.c b/src/gallium/drivers/asahi/agx_nir_lower_point_size.c new file mode 100644 index 00000000000..98216f4c0f8 --- /dev/null +++ b/src/gallium/drivers/asahi/agx_nir_lower_point_size.c @@ -0,0 +1,58 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ +#include "agx_state.h" +#include "nir_builder.h" + +/* + * gl_PointSize lowering. This runs late on a vertex shader (epilogue). By this + * time, I/O has been lowered, and transform feedback has been written. Point + * size will thus only get consumed by the rasterizer, so we can clamp/replace. + * We do this instead of the mesa/st lowerings for better behaviour with lowered + * I/O and vertex epilogues. + */ + +static bool +pass(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + bool *program_point_size = data; + b->cursor = nir_before_instr(&intr->instr); + + if ((intr->intrinsic != nir_intrinsic_store_output) || + (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)) + return false; + + if (*program_point_size) { + /* We want to use this point size. Clamp it. */ + nir_src_rewrite(&intr->src[0], + nir_fmax(b, intr->src[0].ssa, nir_imm_float(b, 1.0f))); + } else { + /* We want to override point size. Remove this store. */ + nir_instr_remove(&intr->instr); + } + + return true; +} + +void +agx_nir_lower_point_size(nir_shader *nir, bool program_point_size) +{ + /* Handle existing point size write */ + nir_shader_intrinsics_pass(nir, pass, + nir_metadata_block_index | nir_metadata_dominance, + &program_point_size); + + /* Write the fixed-function point size if we have one */ + if (!program_point_size) { + nir_builder b = + nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(nir))); + + nir_store_output( + &b, nir_load_fixed_point_size_agx(&b), nir_imm_int(&b, 0), + .io_semantics.location = VARYING_SLOT_PSIZ, + .io_semantics.num_slots = 1, .write_mask = nir_component_mask(1)); + + nir->info.outputs_written |= VARYING_BIT_PSIZ; + } +} diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index 3ca7b895780..b99f8465890 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -158,6 +158,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return load_sysval_root(b, 1, 64, &u->input_assembly); case nir_intrinsic_load_geometry_param_buffer_agx: return load_sysval_root(b, 1, 64, &u->geometry_params); + case nir_intrinsic_load_fixed_point_size_agx: + return load_sysval_root(b, 1, 32, &u->fixed_point_size); default: break; } diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 501d66bc908..409cbc810d9 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1697,13 +1697,6 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY: return 1; - /* We run nir_lower_point_size so we need the GLSL linker to copy - * the original gl_PointSize when captured by transform feedback. We could - * also copy it ourselves but it's easier to set the CAP. - */ - case PIPE_CAP_PSIZ_CLAMPED: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_SIZE: return 16384; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -1758,7 +1751,6 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FLATSHADE: case PIPE_CAP_TWO_SIDED_COLOR: case PIPE_CAP_ALPHA_TEST: - case PIPE_CAP_POINT_SIZE_FIXED: case PIPE_CAP_CLIP_PLANES: case PIPE_CAP_NIR_IMAGES_AS_DEREF: return 0; diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index c73a007a8dc..6d4268b018e 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -50,6 +50,7 @@ #include "agx_nir_lower_gs.h" #include "agx_tilebuffer.h" #include "nir_builder.h" +#include "nir_builder_opcodes.h" #include "nir_intrinsics.h" #include "nir_intrinsics_indices.h" #include "pool.h" @@ -1697,6 +1698,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, struct asahi_vs_shader_key *key = &key_->vs; NIR_PASS_V(nir, agx_nir_lower_vbo, &key->vbuf); + NIR_PASS_V(nir, agx_nir_lower_point_size, key->program_point_size); if (should_lower_clip_m1_1(dev, key->clip_halfz)) { NIR_PASS_V(nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1, @@ -2126,6 +2128,19 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, return true; } +static enum mesa_prim +rast_prim(enum mesa_prim mode, unsigned fill_mode) +{ + if (u_reduced_prim(mode) == MESA_PRIM_TRIANGLES) { + if (fill_mode == PIPE_POLYGON_MODE_POINT) + return MESA_PRIM_POINTS; + else if (fill_mode == PIPE_POLYGON_MODE_LINE) + return MESA_PRIM_LINES; + } + + return mode; +} + static bool agx_update_vs(struct agx_context *ctx) { @@ -2136,12 +2151,22 @@ agx_update_vs(struct agx_context *ctx) * outputs_{flat,linear}_shaded: FS_PROG */ if (!(ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB | - AGX_DIRTY_FS_PROG | AGX_DIRTY_RS))) + AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM))) return false; + enum mesa_prim rasterized_prim = + rast_prim(ctx->batch->reduced_prim, ctx->rast->base.fill_front); + struct asahi_vs_shader_key key = { .vbuf.count = util_last_bit(ctx->vb_mask), .clip_halfz = ctx->rast->base.clip_halfz, + + /* If we are not rasterizing points, set program_point_size to eliminate + * the useless point size write. + */ + .program_point_size = ctx->rast->base.point_size_per_vertex || + rasterized_prim != MESA_PRIM_POINTS, + .outputs_flat_shaded = ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded, .outputs_linear_shaded = @@ -3824,19 +3849,6 @@ agx_needs_passthrough_gs(struct agx_context *ctx, return false; } -static enum mesa_prim -rast_prim(enum mesa_prim mode, unsigned fill_mode) -{ - if (u_reduced_prim(mode) == MESA_PRIM_TRIANGLES) { - if (fill_mode == PIPE_POLYGON_MODE_POINT) - return MESA_PRIM_POINTS; - else if (fill_mode == PIPE_POLYGON_MODE_LINE) - return MESA_PRIM_LINES; - } - - return mode; -} - static struct agx_uncompiled_shader * agx_get_passthrough_gs(struct agx_context *ctx, struct agx_uncompiled_shader *prev_cso, @@ -4031,6 +4043,14 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, agx_batch_init_state(batch); + /* Dirty track the reduced prim: lines vs points vs triangles. Happens before + * agx_update_vs/agx_update_fs, which specialize based on primitive. + */ + enum mesa_prim reduced_prim = u_reduced_prim(info->mode); + if (reduced_prim != batch->reduced_prim) + ctx->dirty |= AGX_DIRTY_PRIM; + batch->reduced_prim = reduced_prim; + /* Update shaders first so we can use them after */ if (agx_update_vs(ctx)) { ctx->dirty |= AGX_DIRTY_VS | AGX_DIRTY_VS_PROG; @@ -4060,14 +4080,6 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, ctx->dirty |= AGX_DIRTY_VS; } - /* Dirty track the reduced prim: lines vs points vs triangles. Happens before - * agx_update_fs, which specializes based on primitive. - */ - enum mesa_prim reduced_prim = u_reduced_prim(info->mode); - if (reduced_prim != batch->reduced_prim) - ctx->dirty |= AGX_DIRTY_PRIM; - batch->reduced_prim = reduced_prim; - if (agx_update_fs(batch)) { ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG; ctx->stage[PIPE_SHADER_FRAGMENT].dirty = ~0; @@ -4132,8 +4144,12 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, sizeof(ctx->blend_color)); } + if (IS_DIRTY(RS)) { + batch->uniforms.fixed_point_size = ctx->rast->base.point_size; + } + if (IS_DIRTY(VS) || IS_DIRTY(FS) || ctx->gs || IS_DIRTY(VERTEX) || - IS_DIRTY(BLEND_COLOR)) { + IS_DIRTY(BLEND_COLOR) || IS_DIRTY(RS)) { agx_upload_uniforms(batch); } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 25b76e8dd5b..29225ce2f91 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -108,6 +108,9 @@ struct PACKED agx_draw_uniforms { /* Blend constant if any */ float blend_constant[4]; + /* glPointSize value */ + float fixed_point_size; + /* Value of the multisample control register, containing sample positions in * each byte (x in low nibble, y in high nibble). */ @@ -364,6 +367,7 @@ struct agx_blend { struct asahi_vs_shader_key { struct agx_vbufs vbuf; bool clip_halfz; + bool program_point_size; uint64_t outputs_flat_shaded; uint64_t outputs_linear_shaded; }; @@ -796,6 +800,8 @@ void agx_upload_uniforms(struct agx_batch *batch); uint64_t agx_upload_stage_uniforms(struct agx_batch *batch, uint64_t textures, enum pipe_shader_type stage); +void agx_nir_lower_point_size(nir_shader *nir, bool program_point_size); + bool agx_nir_lower_sysvals(nir_shader *shader, bool lower_draw_params); bool agx_nir_layout_uniforms(nir_shader *shader, diff --git a/src/gallium/drivers/asahi/meson.build b/src/gallium/drivers/asahi/meson.build index 17bfabf2d6d..140024e3f9b 100644 --- a/src/gallium/drivers/asahi/meson.build +++ b/src/gallium/drivers/asahi/meson.build @@ -10,6 +10,7 @@ files_asahi = files( 'agx_pipe.c', 'agx_nir_lower_sysvals.c', 'agx_nir_lower_bindings.c', + 'agx_nir_lower_point_size.c', 'agx_query.c', 'agx_state.c', 'agx_streamout.c',