freedreno/ir3: add SSBO get_buffer_size() support

Somehow I overlooked this when adding initial SSBO support.

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2017-10-30 13:23:37 -04:00
parent b267a08404
commit 33f5f63b8f
6 changed files with 122 additions and 11 deletions

View file

@ -379,14 +379,8 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
CP_LOAD_STATE4_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
if (buf->buffer) {
struct fd_resource *rsc = fd_resource(buf->buffer);
OUT_RELOCW(ring, rsc->bo, 0, 0, 0);
} else {
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
}
@ -401,10 +395,13 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
for (unsigned i = 0; i < count; i++) {
struct pipe_shader_buffer *buf = &so->sb[i];
unsigned sz = buf->buffer_size;
// TODO maybe offset encoded somewhere here??
OUT_RING(ring, (buf->buffer_size << 16));
OUT_RING(ring, 0x00000000);
/* width is in dwords, overflows into height: */
sz /= 4;
OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
}
OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count));

View file

@ -217,6 +217,8 @@ compile_init(struct ir3_compiler *compiler,
nir_print_shader(ctx->s, stdout);
}
ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
@ -225,6 +227,7 @@ compile_init(struct ir3_compiler *compiler,
*
* user consts
* UBO addresses
* SSBO sizes
* if (vertex shader) {
* driver params (IR3_DP_*)
* if (stream_output.num_outputs > 0)
@ -245,6 +248,12 @@ compile_init(struct ir3_compiler *compiler,
constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
}
if (so->const_layout.ssbo_size.count > 0) {
unsigned cnt = so->const_layout.ssbo_size.count;
so->constbase.ssbo_sizes = constoff;
constoff += align(cnt, 4) / 4;
}
unsigned num_driver_params = 0;
if (so->type == SHADER_VERTEX) {
num_driver_params = IR3_DP_VS_COUNT;
@ -1302,6 +1311,21 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
array_insert(b, b->keeps, stgb);
}
/* src[] = { block_index } */
static void
emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0];
unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
ctx->so->const_layout.ssbo_size.off[blk_idx];
debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
dst[0] = create_uniform(ctx, idx);
}
static struct ir3_instruction *
emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
@ -1483,6 +1507,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
case nir_intrinsic_store_ssbo:
emit_intrinsic_store_ssbo(ctx, intr);
break;
case nir_intrinsic_get_buffer_size:
emit_intrinsic_ssbo_size(ctx, intr, dst);
break;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:

View file

@ -209,3 +209,38 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
return s;
}
void
ir3_nir_scan_driver_consts(nir_shader *shader,
struct ir3_driver_const_layout *layout)
{
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_foreach_block(block, function->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr =
nir_instr_as_intrinsic(instr);
unsigned idx;
switch (intr->intrinsic) {
case nir_intrinsic_get_buffer_size:
idx = nir_src_as_const_value(intr->src[0])->u32[0];
if (layout->ssbo_size.mask & (1 << idx))
break;
layout->ssbo_size.mask |= (1 << idx);
layout->ssbo_size.off[idx] =
layout->ssbo_size.count;
layout->ssbo_size.count += 1; /* one const per */
break;
default:
break;
}
}
}
}
}

View file

@ -34,6 +34,8 @@
#include "ir3_shader.h"
void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
bool ir3_nir_lower_if_else(nir_shader *shader);
bool ir3_nir_apply_trig_workarounds(nir_shader *shader);

View file

@ -606,6 +606,27 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
}
}
static void
emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
{
uint32_t offset = v->constbase.ssbo_sizes;
if (v->constlen > offset) {
uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
unsigned mask = v->const_layout.ssbo_size.mask;
while (mask) {
unsigned index = u_bit_scan(&mask);
unsigned off = v->const_layout.ssbo_size.off[index];
sizes[off] = sb->sb[index].buffer_size;
}
fd_wfi(ctx->batch, ring);
ctx->emit_const(ring, v->type, offset * 4,
0, ARRAY_SIZE(sizes), sizes, NULL);
}
}
static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
@ -726,6 +747,11 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
if (shader_dirty)
emit_immediates(ctx, v, ring);
}
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
emit_ssbo_sizes(ctx, v, ring, sb);
}
}
void

View file

@ -56,6 +56,26 @@ enum ir3_driver_param {
IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */
};
/**
* For consts needed to pass internal values to shader which may or may not
* be required, rather than allocating worst-case const space, we scan the
* shader and allocate consts as-needed:
*
* + SSBO sizes: only needed if shader has a get_buffer_size intrinsic
* for a given SSBO
*/
struct ir3_driver_const_layout {
struct {
uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */
uint32_t count; /* number of consts allocated */
/* one const allocated per SSBO which has get_buffer_size,
* ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
* consts:
*/
uint32_t off[PIPE_MAX_SHADER_BUFFERS];
} ssbo_size;
};
/* Configuration key used to identify a shader variant.. different
* shader variants can be used to implement features not supported
* in hw (two sided color), binning-pass vertex shader, etc.
@ -173,6 +193,7 @@ struct ir3_shader_variant {
struct ir3_shader_key key;
struct ir3_driver_const_layout const_layout;
struct ir3_info info;
struct ir3 *ir;
@ -191,6 +212,7 @@ struct ir3_shader_variant {
* constants, etc.
*/
unsigned num_uniforms;
unsigned num_ubos;
/* About Linkage:
@ -271,6 +293,8 @@ struct ir3_shader_variant {
struct {
/* user const start at zero */
unsigned ubo;
/* NOTE that a3xx might need a section for SSBO addresses too */
unsigned ssbo_sizes;
unsigned driver_param;
unsigned tfbo;
unsigned immediate;