diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 2492aa6d9aa..2955e88882b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1863,6 +1863,11 @@ intrinsic("sample_mask_agx", src_comp=[1, 1]) # sample_mask_agx also triggers depth/stencil testing. intrinsic("discard_agx", src_comp=[1]) +# For a given row of the polygon stipple given as an integer source in [0, 31], +# load the 32-bit stipple pattern for that row. +intrinsic("load_polygon_stipple_agx", src_comp=[1], dest_comp=1, bit_sizes=[32], + flags=[CAN_ELIMINATE, CAN_ELIMINATE]) + # The fixed-function sample mask specified in the API (e.g. glSampleMask) system_value("api_sample_mask_agx", 1, bit_sizes=[16]) diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index 999cc146d81..0fcaf870ddb 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -165,6 +165,14 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return load_sysval_root(b, 1, 32, &u->fixed_point_size); case nir_intrinsic_load_tex_sprite_mask_agx: return load_sysval_root(b, 1, 16, &u->sprite_mask); + case nir_intrinsic_load_polygon_stipple_agx: { + nir_def *base = load_sysval_root(b, 1, 64, &u->polygon_stipple); + nir_def *row = intr->src[0].ssa; + nir_def *addr = nir_iadd(b, base, nir_u2u64(b, nir_imul_imm(b, row, 4))); + + return nir_load_global_constant(b, addr, 4, 1, 32); + } + default: break; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 14f45596a28..95003d44b33 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -918,9 +918,13 @@ agx_set_clip_state(struct pipe_context *ctx, } static void -agx_set_polygon_stipple(struct pipe_context *ctx, +agx_set_polygon_stipple(struct pipe_context *pctx, const struct pipe_poly_stipple *state) { + struct agx_context *ctx = agx_context(pctx); + + memcpy(ctx->poly_stipple, state->stipple, sizeof(ctx->poly_stipple)); + ctx->dirty |= AGX_DIRTY_POLY_STIPPLE; } static void @@ -1758,6 +1762,46 @@ agx_compile_nir(struct agx_device *dev, nir_shader *nir, return compiled; } +/* + * Insert code into a fragment shader to lower polygon stipple. The stipple is + * passed in a sideband, rather than requiring a texture binding. This is + * simpler for drivers to integrate and might be more efficient. + */ +static bool +agx_nir_lower_poly_stipple(nir_shader *s) +{ + assert(s->info.stage == MESA_SHADER_FRAGMENT); + + /* Insert at the beginning for performance. */ + nir_builder b_ = + nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s))); + nir_builder *b = &b_; + + /* The stipple coordinate is defined at the window coordinate mod 32. It's + * reversed along the X-axis to simplify the driver, hence the NOT. + */ + nir_def *raw = nir_u2u32(b, nir_load_pixel_coord(b)); + nir_def *coord = nir_umod_imm( + b, + nir_vec2(b, nir_inot(b, nir_channel(b, raw, 0)), nir_channel(b, raw, 1)), + 32); + + /* Load the stipple pattern for the row */ + nir_def *pattern = nir_load_polygon_stipple_agx(b, nir_channel(b, coord, 1)); + + /* Extract the column from the packed bitfield */ + nir_def *bit = nir_ubitfield_extract(b, pattern, nir_channel(b, coord, 0), + nir_imm_int(b, 1)); + + /* Discard fragments where the pattern is 0 */ + nir_discard_if(b, nir_ieq_imm(b, bit, 0)); + s->info.fs.uses_discard = true; + + nir_metadata_preserve(b->impl, + nir_metadata_dominance | nir_metadata_block_index); + return true; +} + /* Does not take ownership of key. Clones if necessary. */ static struct agx_compiled_shader * agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, @@ -1891,6 +1935,11 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, key->cull_distance_size); } + /* Similarly for polygon stipple */ + if (key->polygon_stipple) { + NIR_PASS_V(nir, agx_nir_lower_poly_stipple); + } + /* Discards must be lowering before lowering MSAA to handle discards */ NIR_PASS(_, nir, agx_nir_lower_discard_zs_emit); @@ -2377,6 +2426,12 @@ agx_update_fs(struct agx_batch *batch) .cull_distance_size = ctx->stage[MESA_SHADER_VERTEX].shader->info.cull_distance_size, .clip_plane_enable = ctx->rast->base.clip_plane_enable, + + .polygon_stipple = + ctx->rast->base.poly_stipple_enable && + rast_prim(batch->reduced_prim, ctx->rast->base.fill_front) == + MESA_PRIM_TRIANGLES, + .nr_samples = nr_samples, /* Only lower sample mask if at least one sample is masked out */ @@ -4328,6 +4383,13 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, batch->uniforms.fixed_point_size = ctx->rast->base.point_size; } + if (IS_DIRTY(POLY_STIPPLE)) { + STATIC_ASSERT(sizeof(ctx->poly_stipple) == 32 * 4); + + batch->uniforms.polygon_stipple = agx_pool_upload_aligned( + &batch->pool, ctx->poly_stipple, sizeof(ctx->poly_stipple), 4); + } + if (IS_DIRTY(VS) || IS_DIRTY(FS) || ctx->gs || IS_DIRTY(VERTEX) || IS_DIRTY(BLEND_COLOR) || IS_DIRTY(RS) || IS_DIRTY(PRIM)) { diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 45267e40712..6f2ded18bb2 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -109,6 +109,9 @@ struct PACKED agx_draw_uniforms { /* Address of geometry param buffer if geometry shaders are used, else 0 */ uint64_t geometry_params; + /* Address of polygon stipple mask if used */ + uint64_t polygon_stipple; + /* Blend constant if any */ float blend_constant[4]; @@ -396,6 +399,7 @@ struct asahi_fs_shader_key { * don't want to emit lowering code for it, since it would disable early-Z. */ bool api_sample_mask; + bool polygon_stipple; uint8_t cull_distance_size; uint8_t clip_plane_enable; @@ -449,6 +453,7 @@ enum agx_dirty { AGX_DIRTY_XFB = BITFIELD_BIT(14), AGX_DIRTY_SAMPLE_MASK = BITFIELD_BIT(15), AGX_DIRTY_BLEND_COLOR = BITFIELD_BIT(16), + AGX_DIRTY_POLY_STIPPLE = BITFIELD_BIT(17), }; /* Maximum number of in-progress + under-construction GPU batches. @@ -535,6 +540,8 @@ struct agx_context { uint16_t sample_mask; struct pipe_framebuffer_state framebuffer; + uint32_t poly_stipple[32]; + struct pipe_query *cond_query; bool cond_cond; enum pipe_render_cond_flag cond_mode;