diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 5c4598a7d80..c3742bc2a22 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -100,12 +100,13 @@ agx_get_cf(agx_context *ctx, bool smooth, bool perspective, * Y alone. */ bool is_pntc = (slot == VARYING_SLOT_PNTC); + bool is_tex = slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7; unsigned cf_offset = 0; - if (is_pntc) { + if (is_pntc || is_tex) { cf_offset = offset; offset = 0; - count = MAX2(2, count + offset); + count = is_tex ? 4 : MAX2(2, count + offset); } /* First, search for an appropriate binding. This is O(n) to the number of diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f1303dabf54..fcbb11ca182 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1849,6 +1849,9 @@ system_value("sample_positions_agx", 1, bit_sizes=[32]) # Loads the fixed-function glPointSize() value system_value("fixed_point_size_agx", 1, bit_sizes=[32]) +# Bit mask of TEX locations that are replaced with point sprites +system_value("tex_sprite_mask_agx", 1, bit_sizes=[16]) + # Image loads go through the texture cache, which is not coherent with the PBE # or memory access, so fencing is necessary for writes to become visible. diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index b99f8465890..d9848b62e78 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -160,6 +160,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return load_sysval_root(b, 1, 64, &u->geometry_params); case nir_intrinsic_load_fixed_point_size_agx: return load_sysval_root(b, 1, 32, &u->fixed_point_size); + case nir_intrinsic_load_tex_sprite_mask_agx: + return load_sysval_root(b, 1, 16, &u->sprite_mask); default: break; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index d42a0be7c75..6c8cb1a5605 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1580,6 +1580,7 @@ agx_num_general_outputs(struct agx_varyings_vs *vs) static uint32_t agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs, struct agx_varyings_fs *fs, bool first_provoking_vertex, + uint8_t sprite_coord_enable, bool *generate_primitive_id) { *generate_primitive_id = false; @@ -1609,7 +1610,8 @@ agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs, cfg.shade_model = agx_translate_shade_model(fs, i, first_provoking_vertex); - if (fs->bindings[i].slot == VARYING_SLOT_PNTC) { + if (util_varying_is_point_coord(fs->bindings[i].slot, + sprite_coord_enable)) { assert(fs->bindings[i].offset == 0); cfg.source = AGX_COEFFICIENT_SOURCE_POINT_COORD; } else if (fs->bindings[i].slot == VARYING_SLOT_PRIMITIVE_ID && @@ -1674,6 +1676,56 @@ agx_nir_lower_clip_m1_1(nir_builder *b, nir_intrinsic_instr *intr, return true; } +static nir_def * +nir_channel_or_undef(nir_builder *b, nir_def *def, signed int channel) +{ + if (channel >= 0 && channel < def->num_components) + return nir_channel(b, def, channel); + else + return nir_undef(b, def->bit_size, 1); +} + +/* + * To implement point sprites, we'll replace TEX0...7 with point coordinate + * reads as required. However, the .zw needs to read back 0.0/1.0. This pass + * fixes up TEX loads of Z and W according to a uniform passed in a sideband, + * eliminating shader variants. + */ +static bool +agx_nir_lower_point_sprite_zw(nir_builder *b, nir_intrinsic_instr *intr, + UNUSED void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_input && + intr->intrinsic != nir_intrinsic_load_interpolated_input) + return false; + + gl_varying_slot loc = nir_intrinsic_io_semantics(intr).location; + if (!(loc >= VARYING_SLOT_TEX0 && loc <= VARYING_SLOT_TEX7)) + return false; + + b->cursor = nir_after_instr(&intr->instr); + unsigned component = nir_intrinsic_component(intr); + + nir_def *mask = nir_load_tex_sprite_mask_agx(b); + nir_def *location = nir_iadd_imm(b, nir_get_io_offset_src(intr)->ssa, + loc - VARYING_SLOT_TEX0); + nir_def *bit = nir_ishl(b, nir_imm_intN_t(b, 1, 16), location); + nir_def *replace = nir_i2b(b, nir_iand(b, mask, bit)); + + nir_def *vec = nir_pad_vec4(b, &intr->def); + nir_def *chans[4] = {NULL, NULL, nir_imm_float(b, 0.0), + nir_imm_float(b, 1.0)}; + + for (unsigned i = 0; i < 4; ++i) { + nir_def *chan = nir_channel_or_undef(b, vec, i - component); + chans[i] = chans[i] ? nir_bcsel(b, replace, chans[i], chan) : chan; + } + + nir_def *new_vec = nir_vec(b, &chans[component], intr->def.num_components); + nir_def_rewrite_uses_after(&intr->def, new_vec, new_vec->parent_instr); + return true; +} + /* * Compile a NIR shader. The only lowering left at this point is sysvals. The * shader key should have already been applied. agx_compile_variant may call @@ -1871,12 +1923,6 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, .api_sample_mask = key->api_sample_mask, }); - if (key->sprite_coord_enable) { - NIR_PASS_V(nir, nir_lower_texcoord_replace_late, - key->sprite_coord_enable, - false /* point coord is sysval */); - } - NIR_PASS_V(nir, agx_nir_predicate_layer_id); } @@ -2004,6 +2050,13 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so, bool allow_mediump = !(dev->debug & AGX_DBG_NO16); agx_preprocess_nir(nir, dev->libagx, allow_mediump, &so->info); + if (nir->info.stage == MESA_SHADER_FRAGMENT && + (nir->info.inputs_read & VARYING_BITS_TEX_ANY)) { + + NIR_PASS_V(nir, nir_shader_intrinsics_pass, agx_nir_lower_point_sprite_zw, + nir_metadata_block_index | nir_metadata_dominance, NULL); + } + blob_init(&so->serialized_nir); nir_serialize(&so->serialized_nir, nir, true); _mesa_sha1_compute(so->serialized_nir.data, so->serialized_nir.size, @@ -2327,9 +2380,6 @@ agx_update_fs(struct agx_batch *batch) msaa && (~ctx->sample_mask & BITFIELD_MASK(nr_samples)), }; - if (batch->reduced_prim == MESA_PRIM_POINTS) - key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable; - for (unsigned i = 0; i < key.nr_cbufs; ++i) { struct pipe_surface *surf = batch->key.cbufs[i]; @@ -3135,10 +3185,14 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, bool varyings_dirty = false; - if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) { + if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS) || + IS_DIRTY(PRIM)) { batch->varyings = agx_link_varyings_vs_fs( &batch->pipeline_pool, &vs->info.varyings.vs, &ctx->fs->info.varyings.fs, ctx->rast->base.flatshade_first, + (batch->reduced_prim == MESA_PRIM_POINTS) + ? ctx->rast->base.sprite_coord_enable + : 0, &batch->generate_primitive_id); varyings_dirty = true; @@ -4247,7 +4301,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, } if (IS_DIRTY(VS) || IS_DIRTY(FS) || ctx->gs || IS_DIRTY(VERTEX) || - IS_DIRTY(BLEND_COLOR) || IS_DIRTY(RS)) { + IS_DIRTY(BLEND_COLOR) || IS_DIRTY(RS) || IS_DIRTY(PRIM)) { agx_upload_uniforms(batch); } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 59bc2a7876f..28697be05f3 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -120,6 +120,9 @@ struct PACKED agx_draw_uniforms { /* gl_DrawID for a direct multidraw */ uint32_t draw_id; + /* Sprite coord replacement mask */ + uint16_t sprite_mask; + /* glSampleMask */ uint16_t sample_mask; @@ -382,9 +385,6 @@ struct asahi_fs_shader_key { struct agx_blend blend; unsigned nr_cbufs; - /* From rasterizer state, to lower point sprites */ - uint16_t sprite_coord_enable; - /* Set if glSampleMask() is used with a mask other than all-1s. If not, we * don't want to emit lowering code for it, since it would disable early-Z. */ diff --git a/src/gallium/drivers/asahi/agx_uniforms.c b/src/gallium/drivers/asahi/agx_uniforms.c index cff48359f41..97e8c01ab7b 100644 --- a/src/gallium/drivers/asahi/agx_uniforms.c +++ b/src/gallium/drivers/asahi/agx_uniforms.c @@ -69,6 +69,10 @@ agx_upload_uniforms(struct agx_batch *batch) batch->uniforms.tables[AGX_SYSVAL_TABLE_ROOT] = root_ptr.gpu; batch->uniforms.sample_mask = ctx->sample_mask; + batch->uniforms.sprite_mask = (batch->reduced_prim == MESA_PRIM_POINTS) + ? ctx->rast->base.sprite_coord_enable + : 0; + memcpy(root_ptr.cpu, &batch->uniforms, sizeof(batch->uniforms)); }