mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 01:38:06 +02:00
freedreno/a6xx: Add support to load driver-params via UBO
In this case, we can't use CP_LOAD_STATE to push the consts inline in the cmdstream, but instead need to setup a UBO. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31534>
This commit is contained in:
parent
f193c61c6b
commit
3a0b022136
7 changed files with 268 additions and 61 deletions
|
|
@ -152,7 +152,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
cs_program_emit<CHIP>(ctx, cs->stateobj, cs->v);
|
||||
|
||||
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v);
|
||||
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size<CHIP>(cs->v);
|
||||
}
|
||||
|
||||
trace_start_compute(&ctx->batch->trace, ring, !!info->indirect, info->work_dim,
|
||||
|
|
@ -190,10 +190,10 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
fd6_emit_cs_state<CHIP>(ctx, ring, cs);
|
||||
|
||||
if (ctx->gen_dirty & BIT(FD6_GROUP_CONST))
|
||||
fd6_emit_cs_user_consts(ctx, ring, cs);
|
||||
fd6_emit_cs_user_consts<CHIP>(ctx, ring, cs);
|
||||
|
||||
if (cs->v->need_driver_params || info->input)
|
||||
fd6_emit_cs_driver_params(ctx, ring, cs, info);
|
||||
fd6_emit_cs_driver_params<CHIP>(ctx, ring, cs, info);
|
||||
|
||||
OUT_PKT7(ring, CP_SET_MARKER, 1);
|
||||
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "fd6_barrier.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_compute.h"
|
||||
#include "fd6_pack.h"
|
||||
|
|
@ -36,6 +37,40 @@ fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant
|
|||
((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
|
||||
}
|
||||
|
||||
/* A helper to upload driver-params to a UBO, for the case where constants are
|
||||
* loaded by shader preamble rather than ST6_CONSTANTS
|
||||
*/
|
||||
static void
|
||||
fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, int base,
|
||||
uint32_t sizedwords, const void *dwords)
|
||||
{
|
||||
struct pipe_context *pctx = &ctx->base;
|
||||
|
||||
assert(ctx->screen->info->chip >= 7 && ctx->screen->info->a7xx.load_shader_consts_via_preamble);
|
||||
|
||||
if (!sizedwords || (base < 0))
|
||||
return;
|
||||
|
||||
unsigned buffer_offset;
|
||||
struct pipe_resource *buffer = NULL;
|
||||
u_upload_data(pctx->const_uploader, 0, sizedwords * sizeof(uint32_t),
|
||||
16, dwords, &buffer_offset, &buffer);
|
||||
if (!buffer)
|
||||
return; /* nothing good will come of this.. */
|
||||
|
||||
/* The backing BO may otherwise not be tracked by the resource, as
|
||||
* this allocation happens outside of the context of batch resource
|
||||
* tracking.
|
||||
*/
|
||||
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
|
||||
|
||||
fd6_emit_driver_ubo(ring, v, base, sizedwords, buffer_offset,
|
||||
fd_resource(buffer)->bo);
|
||||
|
||||
pipe_resource_reference(&buffer, NULL);
|
||||
}
|
||||
|
||||
/* regid: base const register
|
||||
* prsc or dwords: buffer containing constant values
|
||||
* sizedwords: size of const value buffer
|
||||
|
|
@ -71,6 +106,7 @@ fd6_emit_const_user(struct fd_ringbuffer *ring,
|
|||
CP_LOAD_STATE6_2());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_const_bo(struct fd_ringbuffer *ring,
|
||||
const struct ir3_shader_variant *v, uint32_t regid,
|
||||
|
|
@ -115,16 +151,31 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
|||
}
|
||||
|
||||
static void
|
||||
emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
||||
uint32_t *params, int num_params)
|
||||
wait_mem_writes(struct fd_context *ctx)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
const unsigned regid = const_state->offsets.primitive_param;
|
||||
int size = MIN2(1 + regid, v->constlen) - regid;
|
||||
if (size > 0)
|
||||
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
|
||||
ctx->batch->barrier |= FD6_WAIT_MEM_WRITES | FD6_INVALIDATE_CACHE | FD6_WAIT_FOR_IDLE;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
||||
struct fd_context *ctx, uint32_t *params, int num_params)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
||||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
int base = const_state->primitive_param_ubo.idx;
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, ring, v, base, num_params, params);
|
||||
} else {
|
||||
const unsigned regid = const_state->offsets.primitive_param;
|
||||
int size = MIN2(1 + regid, v->constlen) - regid;
|
||||
if (size > 0)
|
||||
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
struct fd_ringbuffer *
|
||||
fd6_build_tess_consts(struct fd6_emit *emit)
|
||||
{
|
||||
|
|
@ -144,7 +195,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
emit->vs->output_size * 4, /* vs vertex stride */
|
||||
0, 0};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
|
||||
emit_stage_tess_consts<CHIP>(constobj, emit->vs, emit->ctx, vs_params, ARRAY_SIZE(vs_params));
|
||||
|
||||
if (emit->hs) {
|
||||
struct fd_bo *tess_bo = ctx->screen->tess_bo;
|
||||
|
|
@ -164,8 +215,8 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
tess_factor_iova >> 32,
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->hs, hs_params,
|
||||
ARRAY_SIZE(hs_params));
|
||||
emit_stage_tess_consts<CHIP>(constobj, emit->hs, emit->ctx,
|
||||
hs_params, ARRAY_SIZE(hs_params));
|
||||
|
||||
if (emit->gs)
|
||||
num_vertices = emit->gs->gs.vertices_in;
|
||||
|
|
@ -181,8 +232,8 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
tess_factor_iova >> 32,
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->ds, ds_params,
|
||||
ARRAY_SIZE(ds_params));
|
||||
emit_stage_tess_consts<CHIP>(constobj, emit->ds, emit->ctx,
|
||||
ds_params, ARRAY_SIZE(ds_params));
|
||||
}
|
||||
|
||||
if (emit->gs) {
|
||||
|
|
@ -200,12 +251,13 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
};
|
||||
|
||||
num_vertices = emit->gs->gs.vertices_in;
|
||||
emit_stage_tess_consts(constobj, emit->gs, gs_params,
|
||||
ARRAY_SIZE(gs_params));
|
||||
emit_stage_tess_consts<CHIP>(constobj, emit->gs, emit->ctx,
|
||||
gs_params, ARRAY_SIZE(gs_params));
|
||||
}
|
||||
|
||||
return constobj;
|
||||
}
|
||||
FD_GENX(fd6_build_tess_consts);
|
||||
|
||||
static void
|
||||
fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
|
|
@ -240,6 +292,7 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
|||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
unsigned
|
||||
fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
|
||||
{
|
||||
|
|
@ -250,8 +303,13 @@ fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
|
|||
const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
|
||||
unsigned packets, size;
|
||||
|
||||
/* pre-calculate size required for userconst stateobj: */
|
||||
ir3_user_consts_size(ubo_state, &packets, &size);
|
||||
if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble) {
|
||||
packets = 0;
|
||||
size = 0;
|
||||
} else {
|
||||
/* pre-calculate size required for userconst stateobj: */
|
||||
ir3_user_consts_size(ubo_state, &packets, &size);
|
||||
}
|
||||
|
||||
/* also account for UBO addresses: */
|
||||
packets += 1;
|
||||
|
|
@ -260,17 +318,23 @@ fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
|
|||
unsigned sizedwords = (4 * packets) + size;
|
||||
return sizedwords * 4;
|
||||
}
|
||||
FD_GENX(fd6_user_consts_cmdstream_size);
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_user_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd_constbuf_stateobj *constbuf)
|
||||
{
|
||||
ir3_emit_user_consts(v, ring, constbuf);
|
||||
fd6_emit_ubos(v, ring, constbuf);
|
||||
|
||||
if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble)
|
||||
return;
|
||||
|
||||
ir3_emit_user_consts(v, ring, constbuf);
|
||||
}
|
||||
|
||||
template <fd6_pipeline_type PIPELINE>
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
struct fd_ringbuffer *
|
||||
fd6_build_user_consts(struct fd6_emit *emit)
|
||||
{
|
||||
|
|
@ -280,33 +344,82 @@ fd6_build_user_consts(struct fd6_emit *emit)
|
|||
struct fd_ringbuffer *constobj =
|
||||
fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
emit_user_consts(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
|
||||
emit_user_consts<CHIP>(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
|
||||
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
if (emit->hs) {
|
||||
emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
|
||||
emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
|
||||
emit_user_consts<CHIP>(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
|
||||
emit_user_consts<CHIP>(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
|
||||
}
|
||||
if (emit->gs) {
|
||||
emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
|
||||
emit_user_consts<CHIP>(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
|
||||
}
|
||||
}
|
||||
emit_user_consts(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
|
||||
emit_user_consts<CHIP>(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
|
||||
|
||||
return constobj;
|
||||
}
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
|
||||
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_user_consts<NO_TESS_GS>(struct fd6_emit *emit);
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpconstobj,
|
||||
struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct ir3_driver_params_vs *vertex_params)
|
||||
{
|
||||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
int base = const_state->driver_params_ubo.idx;
|
||||
|
||||
template <fd6_pipeline_type PIPELINE>
|
||||
fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
|
||||
dword_sizeof(*vertex_params),
|
||||
vertex_params);
|
||||
} else {
|
||||
ir3_emit_driver_params(v, dpconstobj, ctx, info, indirect, vertex_params);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static inline void
|
||||
emit_hs_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *dpconstobj,
|
||||
struct fd_context *ctx)
|
||||
{
|
||||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
|
||||
int base = const_state->driver_params_ubo.idx;
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
|
||||
dword_sizeof(hs_params),
|
||||
&hs_params);
|
||||
} else {
|
||||
ir3_emit_hs_driver_params(v, dpconstobj, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
struct fd_ringbuffer *
|
||||
fd6_build_driver_params(struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
unsigned num_dp = emit->prog->num_driver_params;
|
||||
unsigned num_ubo_dp;
|
||||
|
||||
if (!num_dp) {
|
||||
if (CHIP == A6XX) {
|
||||
assert(!emit->prog->num_ubo_driver_params);
|
||||
/* Make it easier for compiler to see that this path isn't used on a6xx: */
|
||||
num_ubo_dp = 0;
|
||||
} else {
|
||||
num_ubo_dp = emit->prog->num_ubo_driver_params;
|
||||
}
|
||||
|
||||
if (!num_dp && !num_ubo_dp) {
|
||||
fd6_ctx->has_dp_state = false;
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -323,53 +436,104 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp);
|
||||
|
||||
unsigned size_dwords =
|
||||
num_dp * (4 + dword_sizeof(p)); /* 4dw PKT7 header */
|
||||
num_dp * (4 + dword_sizeof(p)) + /* 4dw PKT7 header */
|
||||
num_ubo_dp * 6; /* 6dw per UBO descriptor */
|
||||
|
||||
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
/* VS still works the old way*/
|
||||
if (emit->vs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
if (emit->gs && emit->gs->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
emit_driver_params<CHIP>(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
|
||||
if (emit->hs && emit->hs->need_driver_params) {
|
||||
ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx);
|
||||
emit_hs_driver_params<CHIP>(emit->hs, dpconstobj, ctx);
|
||||
}
|
||||
|
||||
if (emit->ds && emit->ds->need_driver_params) {
|
||||
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
emit_driver_params<CHIP>(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
|
||||
}
|
||||
}
|
||||
|
||||
if (emit->indirect)
|
||||
wait_mem_writes(ctx);
|
||||
|
||||
fd6_ctx->has_dp_state = true;
|
||||
|
||||
return dpconstobj;
|
||||
}
|
||||
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<NO_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
|
||||
template struct fd_ringbuffer * fd6_build_driver_params<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
|
||||
}
|
||||
/* info->input not handled in the UBO path. I believe this was only
|
||||
* ever used by clover
|
||||
*/
|
||||
assert(!info->input);
|
||||
|
||||
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(cs->v);
|
||||
struct ir3_driver_params_cs compute_params =
|
||||
ir3_build_driver_params_cs(cs->v, info);
|
||||
int base = const_state->driver_params_ubo.idx;
|
||||
|
||||
if (base < 0)
|
||||
return;
|
||||
|
||||
struct pipe_resource *buffer = NULL;
|
||||
unsigned buffer_offset;
|
||||
|
||||
u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
|
||||
16, &compute_params, &buffer_offset, &buffer);
|
||||
|
||||
if (info->indirect) {
|
||||
/* Copy indirect params into UBO: */
|
||||
ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
|
||||
info->indirect_offset, 3);
|
||||
|
||||
wait_mem_writes(ctx);
|
||||
} else {
|
||||
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
|
||||
}
|
||||
|
||||
fd6_emit_driver_ubo(ring, cs->v, base, dword_sizeof(compute_params),
|
||||
buffer_offset, fd_resource(buffer)->bo);
|
||||
|
||||
pipe_resource_reference(&buffer, NULL);
|
||||
} else {
|
||||
ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
|
||||
if (info->indirect)
|
||||
wait_mem_writes(ctx);
|
||||
}
|
||||
}
|
||||
FD_GENX(fd6_emit_cs_driver_params);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs)
|
||||
{
|
||||
emit_user_consts(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
|
||||
emit_user_consts<CHIP>(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
|
||||
}
|
||||
FD_GENX(fd6_emit_cs_user_consts);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_immediates(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring)
|
||||
|
|
@ -383,13 +547,28 @@ fd6_emit_immediates(const struct ir3_shader_variant *v,
|
|||
v->info.constant_data_offset, v->bo);
|
||||
}
|
||||
|
||||
if (CHIP == A7XX && v->compiler->load_inline_uniforms_via_preamble_ldgk)
|
||||
return;
|
||||
|
||||
ir3_emit_immediates(v, ring);
|
||||
}
|
||||
FD_GENX(fd6_emit_immediates);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_link_map(const struct ir3_shader_variant *producer,
|
||||
fd6_emit_link_map(struct fd_context *ctx,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *consumer,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
ir3_emit_link_map(producer, consumer, ring);
|
||||
if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
|
||||
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
||||
int base = const_state->primitive_map_ubo.idx;
|
||||
uint32_t size = ALIGN(consumer->input_size, 4);
|
||||
|
||||
fd6_upload_emit_driver_ubo(ctx, ring, consumer, base, size, producer->output_loc);
|
||||
} else {
|
||||
ir3_emit_link_map(producer, consumer, ring);
|
||||
}
|
||||
}
|
||||
FD_GENX(fd6_emit_link_map);
|
||||
|
|
|
|||
|
|
@ -9,26 +9,33 @@
|
|||
|
||||
#include "fd6_emit.h"
|
||||
|
||||
template <chip CHIP>
|
||||
struct fd_ringbuffer *fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
|
||||
template <chip CHIP>
|
||||
unsigned fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v);
|
||||
|
||||
template <fd6_pipeline_type PIPELINE>
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
|
||||
|
||||
template <fd6_pipeline_type PIPELINE>
|
||||
template <chip CHIP, fd6_pipeline_type PIPELINE>
|
||||
struct fd_ringbuffer *
|
||||
fd6_build_driver_params(struct fd6_emit *emit) assert_dt;
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs,
|
||||
const struct pipe_grid_info *info) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs) assert_dt;
|
||||
template <chip CHIP>
|
||||
void fd6_emit_immediates(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring) assert_dt;
|
||||
void fd6_emit_link_map(const struct ir3_shader_variant *producer,
|
||||
template <chip CHIP>
|
||||
void fd6_emit_link_map(struct fd_context *ctx,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *consumer,
|
||||
struct fd_ringbuffer *ring) assert_dt;
|
||||
|
||||
|
|
|
|||
|
|
@ -679,16 +679,16 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS);
|
||||
break;
|
||||
case FD6_GROUP_CONST:
|
||||
state = fd6_build_user_consts<PIPELINE>(emit);
|
||||
state = fd6_build_user_consts<CHIP, PIPELINE>(emit);
|
||||
fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST);
|
||||
break;
|
||||
case FD6_GROUP_DRIVER_PARAMS:
|
||||
state = fd6_build_driver_params<PIPELINE>(emit);
|
||||
state = fd6_build_driver_params<CHIP, PIPELINE>(emit);
|
||||
fd6_state_take_group(&emit->state, state, FD6_GROUP_DRIVER_PARAMS);
|
||||
break;
|
||||
case FD6_GROUP_PRIMITIVE_PARAMS:
|
||||
if (PIPELINE == HAS_TESS_GS) {
|
||||
state = fd6_build_tess_consts(emit);
|
||||
state = fd6_build_tess_consts<CHIP>(emit);
|
||||
fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS);
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
OUT_RELOC(ring, so->bo, 0, 0, 0);
|
||||
}
|
||||
|
||||
fd6_emit_immediates(so, ring);
|
||||
fd6_emit_immediates<CHIP>(so, ring);
|
||||
}
|
||||
FD_GENX(fd6_emit_shader);
|
||||
|
||||
|
|
@ -839,8 +839,8 @@ emit_vpc(struct fd_ringbuffer *ring, const struct program_builder *b)
|
|||
OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
|
||||
OUT_RING(ring, b->hs->tess.tcs_vertices_out);
|
||||
|
||||
fd6_emit_link_map(b->vs, b->hs, ring);
|
||||
fd6_emit_link_map(b->hs, b->ds, ring);
|
||||
fd6_emit_link_map<CHIP>(b->ctx, b->vs, b->hs, ring);
|
||||
fd6_emit_link_map<CHIP>(b->ctx, b->hs, b->ds, ring);
|
||||
}
|
||||
|
||||
if (b->gs) {
|
||||
|
|
@ -849,10 +849,11 @@ emit_vpc(struct fd_ringbuffer *ring, const struct program_builder *b)
|
|||
b->ds ? b->ds->output_size : b->vs->output_size;
|
||||
|
||||
if (b->hs) {
|
||||
fd6_emit_link_map(b->ds, b->gs, ring);
|
||||
fd6_emit_link_map<CHIP>(b->ctx, b->ds, b->gs, ring);
|
||||
} else {
|
||||
fd6_emit_link_map(b->vs, b->gs, ring);
|
||||
fd6_emit_link_map<CHIP>(b->ctx, b->vs, b->gs, ring);
|
||||
}
|
||||
|
||||
vertices_out = MAX2(1, b->gs->gs.vertices_out) - 1;
|
||||
enum a6xx_tess_output output =
|
||||
primitive_to_tess((enum mesa_prim)b->gs->gs.output_primitive);
|
||||
|
|
@ -1451,23 +1452,33 @@ fd6_program_create(void *data, const struct ir3_shader_variant *bs,
|
|||
|
||||
/* Note that binning pass uses same const state as draw pass: */
|
||||
state->user_consts_cmdstream_size =
|
||||
fd6_user_consts_cmdstream_size(state->vs) +
|
||||
fd6_user_consts_cmdstream_size(state->hs) +
|
||||
fd6_user_consts_cmdstream_size(state->ds) +
|
||||
fd6_user_consts_cmdstream_size(state->gs) +
|
||||
fd6_user_consts_cmdstream_size(state->fs);
|
||||
fd6_user_consts_cmdstream_size<CHIP>(state->vs) +
|
||||
fd6_user_consts_cmdstream_size<CHIP>(state->hs) +
|
||||
fd6_user_consts_cmdstream_size<CHIP>(state->ds) +
|
||||
fd6_user_consts_cmdstream_size<CHIP>(state->gs) +
|
||||
fd6_user_consts_cmdstream_size<CHIP>(state->fs);
|
||||
|
||||
unsigned num_dp = 0;
|
||||
unsigned num_ubo_dp = 0;
|
||||
|
||||
if (vs->need_driver_params)
|
||||
num_dp++;
|
||||
|
||||
if (gs && gs->need_driver_params)
|
||||
num_dp++;
|
||||
num_ubo_dp++;
|
||||
if (hs && hs->need_driver_params)
|
||||
num_dp++;
|
||||
num_ubo_dp++;
|
||||
if (ds && ds->need_driver_params)
|
||||
num_dp++;
|
||||
num_ubo_dp++;
|
||||
|
||||
if (!(CHIP == A7XX && vs->compiler->load_inline_uniforms_via_preamble_ldgk)) {
|
||||
/* On a6xx all shader stages use driver params pushed in cmdstream: */
|
||||
num_dp += num_ubo_dp;
|
||||
num_ubo_dp = 0;
|
||||
}
|
||||
|
||||
state->num_driver_params = num_dp;
|
||||
state->num_ubo_driver_params = num_ubo_dp;
|
||||
|
||||
/* dual source blending has an extra fs output in the 2nd slot */
|
||||
if (fs->fs.color_is_dual_source) {
|
||||
|
|
|
|||
|
|
@ -38,13 +38,18 @@ struct fd6_program_state {
|
|||
* Whether multiple viewports are used is determined by whether
|
||||
* the last shader stage writes viewport id
|
||||
*/
|
||||
uint16_t num_viewports;
|
||||
uint8_t num_viewports;
|
||||
|
||||
/**
|
||||
* The # of shader stages that need driver params.
|
||||
*/
|
||||
uint8_t num_driver_params;
|
||||
|
||||
/**
|
||||
* The # of shader stages that need ubo driver params
|
||||
*/
|
||||
uint8_t num_ubo_driver_params;
|
||||
|
||||
/**
|
||||
* Output components from frag shader. It is possible to have
|
||||
* a fragment shader that only writes a subset of the bound
|
||||
|
|
|
|||
|
|
@ -574,6 +574,11 @@ ir3_screen_init(struct pipe_screen *pscreen)
|
|||
if (screen->gen >= 6) {
|
||||
options.lower_base_vertex = true;
|
||||
}
|
||||
|
||||
if (screen->gen >= 7) {
|
||||
options.push_ubo_with_preamble = true;
|
||||
}
|
||||
|
||||
screen->compiler =
|
||||
ir3_compiler_create(screen->dev, screen->dev_id, screen->info, &options);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue