mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
freedreno/ir3: compute shader support
Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
39c5a46a7a
commit
2ce449fa7d
4 changed files with 154 additions and 19 deletions
|
|
@ -31,6 +31,7 @@ enum shader_t {
|
|||
SHADER_GEOM,
|
||||
SHADER_FRAGMENT,
|
||||
SHADER_COMPUTE,
|
||||
SHADER_MAX,
|
||||
};
|
||||
|
||||
/* bitmask of debug flags */
|
||||
|
|
|
|||
|
|
@ -71,6 +71,9 @@ struct ir3_compile {
|
|||
/* For vertex shaders, keep track of the system values sources */
|
||||
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
|
||||
|
||||
/* Compute shader inputs: */
|
||||
struct ir3_instruction *local_invocation_id, *work_group_id;
|
||||
|
||||
/* For SSBO's and atomics, we need to preserve order, such
|
||||
* that reads don't overtake writes, and the order of writes
|
||||
* is preserved. Atomics are considered as a write.
|
||||
|
|
@ -228,15 +231,21 @@ compile_init(struct ir3_compiler *compiler,
|
|||
constoff += align(ctx->s->info->num_ubos * ptrsz, 4) / 4;
|
||||
}
|
||||
|
||||
unsigned num_driver_params = 0;
|
||||
if (so->type == SHADER_VERTEX) {
|
||||
so->constbase.driver_param = constoff;
|
||||
constoff += align(IR3_DP_COUNT, 4) / 4;
|
||||
num_driver_params = IR3_DP_VS_COUNT;
|
||||
} else if (so->type == SHADER_COMPUTE) {
|
||||
num_driver_params = IR3_DP_CS_COUNT;
|
||||
}
|
||||
|
||||
if ((compiler->gpu_id < 500) &&
|
||||
so->shader->stream_output.num_outputs > 0) {
|
||||
so->constbase.tfbo = constoff;
|
||||
constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4;
|
||||
}
|
||||
so->constbase.driver_param = constoff;
|
||||
constoff += align(num_driver_params, 4) / 4;
|
||||
|
||||
if ((so->type == SHADER_VERTEX) &&
|
||||
(compiler->gpu_id < 500) &&
|
||||
so->shader->stream_output.num_outputs > 0) {
|
||||
so->constbase.tfbo = constoff;
|
||||
constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4;
|
||||
}
|
||||
|
||||
so->constbase.immediate = constoff;
|
||||
|
|
@ -538,7 +547,7 @@ create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n,
|
|||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_input(struct ir3_block *block, unsigned n)
|
||||
create_input_compmask(struct ir3_block *block, unsigned n, unsigned compmask)
|
||||
{
|
||||
struct ir3_instruction *in;
|
||||
|
||||
|
|
@ -546,9 +555,17 @@ create_input(struct ir3_block *block, unsigned n)
|
|||
in->inout.block = block;
|
||||
ir3_reg_create(in, n, 0);
|
||||
|
||||
in->regs[0]->wrmask = compmask;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_input(struct ir3_block *block, unsigned n)
|
||||
{
|
||||
return create_input_compmask(block, n, 0x1);
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_frag_input(struct ir3_compile *ctx, bool use_ldlv)
|
||||
{
|
||||
|
|
@ -1309,7 +1326,8 @@ emit_intrinsic_atomic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
array_insert(b, b->keeps, atomic);
|
||||
}
|
||||
|
||||
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
||||
static void add_sysval_input_compmask(struct ir3_compile *ctx,
|
||||
gl_system_value slot, unsigned compmask,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
struct ir3_shader_variant *so = ctx->so;
|
||||
|
|
@ -1318,7 +1336,7 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
|||
|
||||
so->inputs[n].sysval = true;
|
||||
so->inputs[n].slot = slot;
|
||||
so->inputs[n].compmask = 1;
|
||||
so->inputs[n].compmask = compmask;
|
||||
so->inputs[n].regid = r;
|
||||
so->inputs[n].interpolate = INTERP_MODE_FLAT;
|
||||
so->total_in++;
|
||||
|
|
@ -1327,6 +1345,12 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
|||
ctx->ir->inputs[r] = instr;
|
||||
}
|
||||
|
||||
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
add_sysval_input_compmask(ctx, slot, 0x1, instr);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
|
|
@ -1476,6 +1500,28 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32);
|
||||
dst[0] = ir3_ADD_S(b, dst[0], 0, create_immed(b, 1), 0);
|
||||
break;
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
if (!ctx->local_invocation_id) {
|
||||
ctx->local_invocation_id = create_input_compmask(b, 0, 0x7);
|
||||
add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID,
|
||||
0x7, ctx->local_invocation_id);
|
||||
}
|
||||
split_dest(b, dst, ctx->local_invocation_id, 0, 3);
|
||||
break;
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
if (!ctx->work_group_id) {
|
||||
ctx->work_group_id = create_input_compmask(b, 0, 0x7);
|
||||
add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID,
|
||||
0x7, ctx->work_group_id);
|
||||
ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH;
|
||||
}
|
||||
split_dest(b, dst, ctx->work_group_id, 0, 3);
|
||||
break;
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_discard_if:
|
||||
case nir_intrinsic_discard: {
|
||||
struct ir3_instruction *cond, *kill;
|
||||
|
|
@ -2381,6 +2427,11 @@ max_drvloc(struct exec_list *vars)
|
|||
return drvloc;
|
||||
}
|
||||
|
||||
static const unsigned max_sysvals[SHADER_MAX] = {
|
||||
[SHADER_VERTEX] = 16,
|
||||
[SHADER_COMPUTE] = 16, // TODO how many do we actually need?
|
||||
};
|
||||
|
||||
static void
|
||||
emit_instructions(struct ir3_compile *ctx)
|
||||
{
|
||||
|
|
@ -2390,11 +2441,9 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
ninputs = (max_drvloc(&ctx->s->inputs) + 1) * 4;
|
||||
noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4;
|
||||
|
||||
/* or vtx shaders, we need to leave room for sysvals:
|
||||
/* we need to leave room for sysvals:
|
||||
*/
|
||||
if (ctx->so->type == SHADER_VERTEX) {
|
||||
ninputs += 16;
|
||||
}
|
||||
ninputs += max_sysvals[ctx->so->type];
|
||||
|
||||
ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
|
||||
|
||||
|
|
@ -2403,9 +2452,7 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
ctx->in_block = ctx->block;
|
||||
list_addtail(&ctx->block->node, &ctx->ir->block_list);
|
||||
|
||||
if (ctx->so->type == SHADER_VERTEX) {
|
||||
ctx->ir->ninputs -= 16;
|
||||
}
|
||||
ninputs -= max_sysvals[ctx->so->type];
|
||||
|
||||
/* for fragment shader, we have a single input register (usually
|
||||
* r0.xy) which is used as the base for bary.f varying fetch instrs:
|
||||
|
|
|
|||
|
|
@ -291,6 +291,7 @@ ir3_shader_create(struct ir3_compiler *compiler,
|
|||
/* we take ownership of the reference: */
|
||||
nir = cso->ir.nir;
|
||||
} else {
|
||||
debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump tgsi: type=%d", shader->type);
|
||||
tgsi_dump(cso->tokens, 0);
|
||||
|
|
@ -317,6 +318,43 @@ ir3_shader_create(struct ir3_compiler *compiler,
|
|||
return shader;
|
||||
}
|
||||
|
||||
/* a bit annoying that compute-shader and normal shader state objects
|
||||
* aren't a bit more aligned.
|
||||
*/
|
||||
struct ir3_shader *
|
||||
ir3_shader_create_compute(struct ir3_compiler *compiler,
|
||||
const struct pipe_compute_state *cso,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
|
||||
|
||||
shader->compiler = compiler;
|
||||
shader->id = ++shader->compiler->shader_count;
|
||||
shader->type = SHADER_COMPUTE;
|
||||
|
||||
nir_shader *nir;
|
||||
if (cso->ir_type == PIPE_SHADER_IR_NIR) {
|
||||
/* we take ownership of the reference: */
|
||||
nir = (nir_shader *)cso->prog;
|
||||
} else {
|
||||
debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump tgsi: type=%d", shader->type);
|
||||
tgsi_dump(cso->prog, 0);
|
||||
}
|
||||
nir = ir3_tgsi_to_nir(cso->prog);
|
||||
}
|
||||
|
||||
/* do first pass optimization, ignoring the key: */
|
||||
shader->nir = ir3_optimize_nir(shader, nir, NULL);
|
||||
if (fd_mesa_debug & FD_DBG_DISASM) {
|
||||
DBG("dump nir%d: type=%d", shader->id, shader->type);
|
||||
nir_print_shader(shader->nir, stdout);
|
||||
}
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
static void dump_reg(const char *name, uint32_t r)
|
||||
{
|
||||
if (r != regid(63,0))
|
||||
|
|
@ -684,7 +722,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
|
|||
if (info) {
|
||||
uint32_t offset = v->constbase.driver_param;
|
||||
if (v->constlen > offset) {
|
||||
uint32_t vertex_params[IR3_DP_COUNT] = {
|
||||
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
|
||||
[IR3_DP_VTXID_BASE] = info->indexed ?
|
||||
info->index_bias : info->start,
|
||||
[IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v),
|
||||
|
|
@ -739,3 +777,39 @@ ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
|
|||
emit_immediates(ctx, v, ring);
|
||||
}
|
||||
}
|
||||
|
||||
/* emit compute-shader consts: */
|
||||
void
|
||||
ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_context *ctx, const struct pipe_grid_info *info)
|
||||
{
|
||||
enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];
|
||||
|
||||
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
|
||||
struct fd_constbuf_stateobj *constbuf;
|
||||
bool shader_dirty;
|
||||
|
||||
constbuf = &ctx->constbuf[PIPE_SHADER_COMPUTE];
|
||||
shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
|
||||
|
||||
emit_user_consts(ctx, v, ring, constbuf);
|
||||
emit_ubos(ctx, v, ring, constbuf);
|
||||
if (shader_dirty)
|
||||
emit_immediates(ctx, v, ring);
|
||||
}
|
||||
|
||||
/* emit compute-shader driver-params: */
|
||||
uint32_t offset = v->constbase.driver_param;
|
||||
if (v->constlen > offset) {
|
||||
uint32_t compute_params[IR3_DP_CS_COUNT] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
|
||||
/* do we need work-group-size? */
|
||||
};
|
||||
|
||||
fd_wfi(ctx->batch, ring);
|
||||
ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
|
||||
ARRAY_SIZE(compute_params), compute_params, NULL);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,13 +38,20 @@
|
|||
|
||||
/* driver param indices: */
|
||||
enum ir3_driver_param {
|
||||
/* compute shader driver params: */
|
||||
IR3_DP_NUM_WORK_GROUPS_X = 0,
|
||||
IR3_DP_NUM_WORK_GROUPS_Y = 1,
|
||||
IR3_DP_NUM_WORK_GROUPS_Z = 2,
|
||||
IR3_DP_CS_COUNT = 4, /* must be aligned to vec4 */
|
||||
|
||||
/* vertex shader driver params: */
|
||||
IR3_DP_VTXID_BASE = 0,
|
||||
IR3_DP_VTXCNT_MAX = 1,
|
||||
/* user-clip-plane components, up to 8x vec4's: */
|
||||
IR3_DP_UCP0_X = 4,
|
||||
/* .... */
|
||||
IR3_DP_UCP7_W = 35,
|
||||
IR3_DP_COUNT = 36 /* must be aligned to vec4 */
|
||||
IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */
|
||||
};
|
||||
|
||||
/* Configuration key used to identify a shader variant.. different
|
||||
|
|
@ -313,6 +320,10 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
|
|||
struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler,
|
||||
const struct pipe_shader_state *cso, enum shader_t type,
|
||||
struct pipe_debug_callback *debug);
|
||||
struct ir3_shader *
|
||||
ir3_shader_create_compute(struct ir3_compiler *compiler,
|
||||
const struct pipe_compute_state *cso,
|
||||
struct pipe_debug_callback *debug);
|
||||
void ir3_shader_destroy(struct ir3_shader *shader);
|
||||
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
|
||||
struct ir3_shader_key key, struct pipe_debug_callback *debug);
|
||||
|
|
@ -325,6 +336,8 @@ void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer
|
|||
struct fd_context *ctx, const struct pipe_draw_info *info);
|
||||
void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_context *ctx);
|
||||
void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_context *ctx, const struct pipe_grid_info *info);
|
||||
|
||||
static inline const char *
|
||||
ir3_shader_stage(struct ir3_shader *shader)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue