From bd3ed09bb7f03fcd42a03aa5befca97a8511b22f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 13 Sep 2024 19:25:13 +0200 Subject: [PATCH] panfrost: Emit an SSBO table on v9+ If we want to be able to replace the SW-based -> global address logic by something that uses LEA_PKA to do the bounds check, we need to emit the SSBO table and lower SSBO indices like we do for other resources. This should stay unused until we toggle the native SSBO switch. Signed-off-by: Boris Brezillon Reviewed-by: Mary Guillemard Reviewed-by: Eric R. Smith Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 38 +++++++++++++++++++ src/gallium/drivers/panfrost/pan_cmdstream.h | 3 ++ src/gallium/drivers/panfrost/pan_context.h | 1 + src/gallium/drivers/panfrost/pan_job.h | 3 ++ .../panfrost/pan_nir_lower_res_indices.c | 18 ++++++++- 5 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 0715841da31..28d9ae29ab7 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1354,6 +1354,41 @@ panfrost_emit_ubo(void *base, unsigned index, mali_ptr address, size_t size) #endif } +#if PAN_ARCH >= 9 +static mali_ptr +panfrost_emit_ssbos(struct panfrost_batch *batch, enum pipe_shader_type st) +{ + struct panfrost_context *ctx = batch->ctx; + unsigned ssbo_count = util_last_bit(ctx->ssbo_mask[st]); + + if (!ssbo_count) + return 0; + + struct panfrost_ptr ssbos = + pan_pool_alloc_desc_array(&batch->pool.base, ssbo_count, BUFFER); + struct mali_buffer_packed *bufs = ssbos.cpu; + + memset(bufs, 0, sizeof(bufs[0]) * ssbo_count); + + u_foreach_bit(ssbo_id, ctx->ssbo_mask[st]) { + struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id]; + struct panfrost_resource *rsrc = pan_resource(sb.buffer); + struct panfrost_bo *bo = rsrc->bo; + + panfrost_batch_write_rsrc(batch, rsrc, st); + + util_range_add(&rsrc->base, &rsrc->valid_buffer_range, sb.buffer_offset, + sb.buffer_size); + pan_pack(&bufs[ssbo_id], BUFFER, cfg) { + cfg.size = sb.buffer_size; + cfg.address = bo->ptr.gpu + sb.buffer_offset; + } + } + + return ssbos.gpu; +} +#endif + static mali_ptr panfrost_emit_const_buf(struct panfrost_batch *batch, enum pipe_shader_type stage, unsigned *buffer_count, @@ -2715,6 +2750,9 @@ panfrost_update_shader_state(struct panfrost_batch *batch, batch->images[st] = ctx->image_mask[st] ? panfrost_emit_images(batch, st) : 0; } + + if (dirty & PAN_DIRTY_STAGE_SSBO) + batch->ssbos[st] = panfrost_emit_ssbos(batch, st); #endif if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) { diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index 1ccc82f52bb..ca8420b7ac0 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -306,6 +306,9 @@ panfrost_emit_resources(struct panfrost_batch *batch, util_last_bit(ctx->vb_mask)); } + panfrost_make_resource_table(T, PAN_TABLE_SSBO, batch->ssbos[stage], + util_last_bit(ctx->ssbo_mask[stage])); + return T.gpu | nr_tables; } #endif /* PAN_ARCH >= 9 */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 4ede60b2bde..13096ac458b 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -322,6 +322,7 @@ enum panfrost_resource_table { PAN_TABLE_SAMPLER, PAN_TABLE_TEXTURE, PAN_TABLE_IMAGE, + PAN_TABLE_SSBO, PAN_NUM_RESOURCE_TABLES }; diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index 5be52e60a15..5c6a917960f 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -201,6 +201,9 @@ struct panfrost_batch { */ mali_ptr images[PIPE_SHADER_TYPES]; + /* SSBOs. */ + mali_ptr ssbos[PIPE_SHADER_TYPES]; + /* On Valhall, these are properties of the batch. On Bifrost, they are * per draw. */ diff --git a/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c index b05798032fc..1d7ccf83565 100644 --- a/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c +++ b/src/gallium/drivers/panfrost/pan_nir_lower_res_indices.c @@ -108,6 +108,19 @@ lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *intrin) return true; } +static bool +lower_ssbo_intrin(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_before_instr(&intrin->instr); + + nir_def *new_offset = nir_ior_imm(b, intrin->src[0].ssa, + pan_res_handle(PAN_TABLE_SSBO, 0)); + + nir_src_rewrite(&intrin->src[0], new_offset); + + return true; +} + static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, const struct panfrost_compile_inputs *inputs) @@ -121,6 +134,9 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, return lower_input_intrin(b, intrin, inputs); case nir_intrinsic_load_ubo: return lower_load_ubo_intrin(b, intrin); + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_ssbo_address: + return lower_ssbo_intrin(b, intrin); default: return false; } @@ -155,4 +171,4 @@ panfrost_nir_lower_res_indices(nir_shader *shader, return nir_shader_instructions_pass( shader, lower_instr, nir_metadata_control_flow, inputs); -} \ No newline at end of file +}