freedreno/a6xx: Add support to load driver-params via UBO

In this case, we can't use CP_LOAD_STATE to push the consts inline in
the cmdstream, but instead need to setup a UBO.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31534>
This commit is contained in:
Rob Clark 2024-09-30 15:03:56 -07:00 committed by Marge Bot
parent f193c61c6b
commit 3a0b022136
7 changed files with 268 additions and 61 deletions

View file

@ -152,7 +152,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
cs_program_emit<CHIP>(ctx, cs->stateobj, cs->v);
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v);
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size<CHIP>(cs->v);
}
trace_start_compute(&ctx->batch->trace, ring, !!info->indirect, info->work_dim,
@ -190,10 +190,10 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
fd6_emit_cs_state<CHIP>(ctx, ring, cs);
if (ctx->gen_dirty & BIT(FD6_GROUP_CONST))
fd6_emit_cs_user_consts(ctx, ring, cs);
fd6_emit_cs_user_consts<CHIP>(ctx, ring, cs);
if (cs->v->need_driver_params || info->input)
fd6_emit_cs_driver_params(ctx, ring, cs, info);
fd6_emit_cs_driver_params<CHIP>(ctx, ring, cs, info);
OUT_PKT7(ring, CP_SET_MARKER, 1);
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));

View file

@ -6,6 +6,7 @@
#define FD_BO_NO_HARDPIN 1
#include "fd6_barrier.h"
#include "fd6_const.h"
#include "fd6_compute.h"
#include "fd6_pack.h"
@ -36,6 +37,40 @@ fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant
((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
}
/* A helper to upload driver-params to a UBO, for the case where constants are
* loaded by shader preamble rather than ST6_CONSTANTS
*/
static void
fd6_upload_emit_driver_ubo(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct ir3_shader_variant *v, int base,
uint32_t sizedwords, const void *dwords)
{
struct pipe_context *pctx = &ctx->base;
assert(ctx->screen->info->chip >= 7 && ctx->screen->info->a7xx.load_shader_consts_via_preamble);
if (!sizedwords || (base < 0))
return;
unsigned buffer_offset;
struct pipe_resource *buffer = NULL;
u_upload_data(pctx->const_uploader, 0, sizedwords * sizeof(uint32_t),
16, dwords, &buffer_offset, &buffer);
if (!buffer)
return; /* nothing good will come of this.. */
/* The backing BO may otherwise not be tracked by the resource, as
* this allocation happens outside of the context of batch resource
* tracking.
*/
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
fd6_emit_driver_ubo(ring, v, base, sizedwords, buffer_offset,
fd_resource(buffer)->bo);
pipe_resource_reference(&buffer, NULL);
}
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
@ -71,6 +106,7 @@ fd6_emit_const_user(struct fd_ringbuffer *ring,
CP_LOAD_STATE6_2());
}
}
void
fd6_emit_const_bo(struct fd_ringbuffer *ring,
const struct ir3_shader_variant *v, uint32_t regid,
@ -115,16 +151,31 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
}
static void
emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
uint32_t *params, int num_params)
wait_mem_writes(struct fd_context *ctx)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
const unsigned regid = const_state->offsets.primitive_param;
int size = MIN2(1 + regid, v->constlen) - regid;
if (size > 0)
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
ctx->batch->barrier |= FD6_WAIT_MEM_WRITES | FD6_INVALIDATE_CACHE | FD6_WAIT_FOR_IDLE;
}
template <chip CHIP>
static void
emit_stage_tess_consts(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
struct fd_context *ctx, uint32_t *params, int num_params)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
int base = const_state->primitive_param_ubo.idx;
fd6_upload_emit_driver_ubo(ctx, ring, v, base, num_params, params);
} else {
const unsigned regid = const_state->offsets.primitive_param;
int size = MIN2(1 + regid, v->constlen) - regid;
if (size > 0)
fd6_emit_const_user(ring, v, regid * 4, num_params, params);
}
}
template <chip CHIP>
struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit *emit)
{
@ -144,7 +195,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
emit->vs->output_size * 4, /* vs vertex stride */
0, 0};
emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
emit_stage_tess_consts<CHIP>(constobj, emit->vs, emit->ctx, vs_params, ARRAY_SIZE(vs_params));
if (emit->hs) {
struct fd_bo *tess_bo = ctx->screen->tess_bo;
@ -164,8 +215,8 @@ fd6_build_tess_consts(struct fd6_emit *emit)
tess_factor_iova >> 32,
};
emit_stage_tess_consts(constobj, emit->hs, hs_params,
ARRAY_SIZE(hs_params));
emit_stage_tess_consts<CHIP>(constobj, emit->hs, emit->ctx,
hs_params, ARRAY_SIZE(hs_params));
if (emit->gs)
num_vertices = emit->gs->gs.vertices_in;
@ -181,8 +232,8 @@ fd6_build_tess_consts(struct fd6_emit *emit)
tess_factor_iova >> 32,
};
emit_stage_tess_consts(constobj, emit->ds, ds_params,
ARRAY_SIZE(ds_params));
emit_stage_tess_consts<CHIP>(constobj, emit->ds, emit->ctx,
ds_params, ARRAY_SIZE(ds_params));
}
if (emit->gs) {
@ -200,12 +251,13 @@ fd6_build_tess_consts(struct fd6_emit *emit)
};
num_vertices = emit->gs->gs.vertices_in;
emit_stage_tess_consts(constobj, emit->gs, gs_params,
ARRAY_SIZE(gs_params));
emit_stage_tess_consts<CHIP>(constobj, emit->gs, emit->ctx,
gs_params, ARRAY_SIZE(gs_params));
}
return constobj;
}
FD_GENX(fd6_build_tess_consts);
static void
fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
@ -240,6 +292,7 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
}
}
template <chip CHIP>
unsigned
fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
{
@ -250,8 +303,13 @@ fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
unsigned packets, size;
/* pre-calculate size required for userconst stateobj: */
ir3_user_consts_size(ubo_state, &packets, &size);
if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble) {
packets = 0;
size = 0;
} else {
/* pre-calculate size required for userconst stateobj: */
ir3_user_consts_size(ubo_state, &packets, &size);
}
/* also account for UBO addresses: */
packets += 1;
@ -260,17 +318,23 @@ fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
unsigned sizedwords = (4 * packets) + size;
return sizedwords * 4;
}
FD_GENX(fd6_user_consts_cmdstream_size);
template <chip CHIP>
static void
emit_user_consts(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring,
struct fd_constbuf_stateobj *constbuf)
{
ir3_emit_user_consts(v, ring, constbuf);
fd6_emit_ubos(v, ring, constbuf);
if (CHIP == A7XX && v->compiler->load_shader_consts_via_preamble)
return;
ir3_emit_user_consts(v, ring, constbuf);
}
template <fd6_pipeline_type PIPELINE>
template <chip CHIP, fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit *emit)
{
@ -280,33 +344,82 @@ fd6_build_user_consts(struct fd6_emit *emit)
struct fd_ringbuffer *constobj =
fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
emit_user_consts(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
emit_user_consts<CHIP>(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
if (PIPELINE == HAS_TESS_GS) {
if (emit->hs) {
emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
emit_user_consts<CHIP>(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
emit_user_consts<CHIP>(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
}
if (emit->gs) {
emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
emit_user_consts<CHIP>(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
}
}
emit_user_consts(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
emit_user_consts<CHIP>(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
return constobj;
}
template struct fd_ringbuffer * fd6_build_user_consts<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<NO_TESS_GS>(struct fd6_emit *emit);
template <chip CHIP>
static inline void
emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *dpconstobj,
struct fd_context *ctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct ir3_driver_params_vs *vertex_params)
{
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
const struct ir3_const_state *const_state = ir3_const_state(v);
int base = const_state->driver_params_ubo.idx;
template <fd6_pipeline_type PIPELINE>
fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
dword_sizeof(*vertex_params),
vertex_params);
} else {
ir3_emit_driver_params(v, dpconstobj, ctx, info, indirect, vertex_params);
}
}
template <chip CHIP>
static inline void
emit_hs_driver_params(const struct ir3_shader_variant *v,
struct fd_ringbuffer *dpconstobj,
struct fd_context *ctx)
{
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
const struct ir3_const_state *const_state = ir3_const_state(v);
struct ir3_driver_params_tcs hs_params = ir3_build_driver_params_tcs(ctx);
int base = const_state->driver_params_ubo.idx;
fd6_upload_emit_driver_ubo(ctx, dpconstobj, v, base,
dword_sizeof(hs_params),
&hs_params);
} else {
ir3_emit_hs_driver_params(v, dpconstobj, ctx);
}
}
template <chip CHIP, fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit *emit)
{
struct fd_context *ctx = emit->ctx;
struct fd6_context *fd6_ctx = fd6_context(ctx);
unsigned num_dp = emit->prog->num_driver_params;
unsigned num_ubo_dp;
if (!num_dp) {
if (CHIP == A6XX) {
assert(!emit->prog->num_ubo_driver_params);
/* Make it easier for compiler to see that this path isn't used on a6xx: */
num_ubo_dp = 0;
} else {
num_ubo_dp = emit->prog->num_ubo_driver_params;
}
if (!num_dp && !num_ubo_dp) {
fd6_ctx->has_dp_state = false;
return NULL;
}
@ -323,53 +436,104 @@ fd6_build_driver_params(struct fd6_emit *emit)
ir3_build_driver_params_vs(ctx, emit->info, emit->draw, emit->draw_id, needs_ucp);
unsigned size_dwords =
num_dp * (4 + dword_sizeof(p)); /* 4dw PKT7 header */
num_dp * (4 + dword_sizeof(p)) + /* 4dw PKT7 header */
num_ubo_dp * 6; /* 6dw per UBO descriptor */
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
ctx->batch->submit, size_dwords * 4, FD_RINGBUFFER_STREAMING);
/* VS still works the old way*/
if (emit->vs->need_driver_params) {
ir3_emit_driver_params(emit->vs, dpconstobj, ctx, emit->info, emit->indirect, &p);
}
if (PIPELINE == HAS_TESS_GS) {
if (emit->gs && emit->gs->need_driver_params) {
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
emit_driver_params<CHIP>(emit->gs, dpconstobj, ctx, emit->info, emit->indirect, &p);
}
if (emit->hs && emit->hs->need_driver_params) {
ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx);
emit_hs_driver_params<CHIP>(emit->hs, dpconstobj, ctx);
}
if (emit->ds && emit->ds->need_driver_params) {
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
emit_driver_params<CHIP>(emit->ds, dpconstobj, ctx, emit->info, emit->indirect, &p);
}
}
if (emit->indirect)
wait_mem_writes(ctx);
fd6_ctx->has_dp_state = true;
return dpconstobj;
}
template struct fd_ringbuffer * fd6_build_driver_params<HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<NO_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<A6XX, HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<A7XX, HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<A6XX, NO_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<A7XX, NO_TESS_GS>(struct fd6_emit *emit);
template <chip CHIP>
void
fd6_emit_cs_driver_params(struct fd_context *ctx,
struct fd_ringbuffer *ring,
struct fd6_compute_state *cs,
const struct pipe_grid_info *info)
{
ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
}
/* info->input not handled in the UBO path. I believe this was only
* ever used by clover
*/
assert(!info->input);
if (CHIP == A7XX && ctx->screen->info->a7xx.load_shader_consts_via_preamble) {
const struct ir3_const_state *const_state = ir3_const_state(cs->v);
struct ir3_driver_params_cs compute_params =
ir3_build_driver_params_cs(cs->v, info);
int base = const_state->driver_params_ubo.idx;
if (base < 0)
return;
struct pipe_resource *buffer = NULL;
unsigned buffer_offset;
u_upload_data(ctx->base.const_uploader, 0, sizeof(compute_params),
16, &compute_params, &buffer_offset, &buffer);
if (info->indirect) {
/* Copy indirect params into UBO: */
ctx->screen->mem_to_mem(ring, buffer, buffer_offset, info->indirect,
info->indirect_offset, 3);
wait_mem_writes(ctx);
} else {
fd_ringbuffer_attach_bo(ring, fd_resource(buffer)->bo);
}
fd6_emit_driver_ubo(ring, cs->v, base, dword_sizeof(compute_params),
buffer_offset, fd_resource(buffer)->bo);
pipe_resource_reference(&buffer, NULL);
} else {
ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
if (info->indirect)
wait_mem_writes(ctx);
}
}
FD_GENX(fd6_emit_cs_driver_params);
template <chip CHIP>
void
fd6_emit_cs_user_consts(struct fd_context *ctx,
struct fd_ringbuffer *ring,
struct fd6_compute_state *cs)
{
emit_user_consts(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
emit_user_consts<CHIP>(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
}
FD_GENX(fd6_emit_cs_user_consts);
template <chip CHIP>
void
fd6_emit_immediates(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
@ -383,13 +547,28 @@ fd6_emit_immediates(const struct ir3_shader_variant *v,
v->info.constant_data_offset, v->bo);
}
if (CHIP == A7XX && v->compiler->load_inline_uniforms_via_preamble_ldgk)
return;
ir3_emit_immediates(v, ring);
}
FD_GENX(fd6_emit_immediates);
template <chip CHIP>
void
fd6_emit_link_map(const struct ir3_shader_variant *producer,
fd6_emit_link_map(struct fd_context *ctx,
const struct ir3_shader_variant *producer,
const struct ir3_shader_variant *consumer,
struct fd_ringbuffer *ring)
{
ir3_emit_link_map(producer, consumer, ring);
if (CHIP == A7XX && producer->compiler->load_shader_consts_via_preamble) {
const struct ir3_const_state *const_state = ir3_const_state(consumer);
int base = const_state->primitive_map_ubo.idx;
uint32_t size = ALIGN(consumer->input_size, 4);
fd6_upload_emit_driver_ubo(ctx, ring, consumer, base, size, producer->output_loc);
} else {
ir3_emit_link_map(producer, consumer, ring);
}
}
FD_GENX(fd6_emit_link_map);

View file

@ -9,26 +9,33 @@
#include "fd6_emit.h"
template <chip CHIP>
struct fd_ringbuffer *fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
template <chip CHIP>
unsigned fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v);
template <fd6_pipeline_type PIPELINE>
template <chip CHIP, fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
template <fd6_pipeline_type PIPELINE>
template <chip CHIP, fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit *emit) assert_dt;
template <chip CHIP>
void fd6_emit_cs_driver_params(struct fd_context *ctx,
struct fd_ringbuffer *ring,
struct fd6_compute_state *cs,
const struct pipe_grid_info *info) assert_dt;
template <chip CHIP>
void fd6_emit_cs_user_consts(struct fd_context *ctx,
struct fd_ringbuffer *ring,
struct fd6_compute_state *cs) assert_dt;
template <chip CHIP>
void fd6_emit_immediates(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring) assert_dt;
void fd6_emit_link_map(const struct ir3_shader_variant *producer,
template <chip CHIP>
void fd6_emit_link_map(struct fd_context *ctx,
const struct ir3_shader_variant *producer,
const struct ir3_shader_variant *consumer,
struct fd_ringbuffer *ring) assert_dt;

View file

@ -679,16 +679,16 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS);
break;
case FD6_GROUP_CONST:
state = fd6_build_user_consts<PIPELINE>(emit);
state = fd6_build_user_consts<CHIP, PIPELINE>(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST);
break;
case FD6_GROUP_DRIVER_PARAMS:
state = fd6_build_driver_params<PIPELINE>(emit);
state = fd6_build_driver_params<CHIP, PIPELINE>(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_DRIVER_PARAMS);
break;
case FD6_GROUP_PRIMITIVE_PARAMS:
if (PIPELINE == HAS_TESS_GS) {
state = fd6_build_tess_consts(emit);
state = fd6_build_tess_consts<CHIP>(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS);
}
break;

View file

@ -236,7 +236,7 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RELOC(ring, so->bo, 0, 0, 0);
}
fd6_emit_immediates(so, ring);
fd6_emit_immediates<CHIP>(so, ring);
}
FD_GENX(fd6_emit_shader);
@ -839,8 +839,8 @@ emit_vpc(struct fd_ringbuffer *ring, const struct program_builder *b)
OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
OUT_RING(ring, b->hs->tess.tcs_vertices_out);
fd6_emit_link_map(b->vs, b->hs, ring);
fd6_emit_link_map(b->hs, b->ds, ring);
fd6_emit_link_map<CHIP>(b->ctx, b->vs, b->hs, ring);
fd6_emit_link_map<CHIP>(b->ctx, b->hs, b->ds, ring);
}
if (b->gs) {
@ -849,10 +849,11 @@ emit_vpc(struct fd_ringbuffer *ring, const struct program_builder *b)
b->ds ? b->ds->output_size : b->vs->output_size;
if (b->hs) {
fd6_emit_link_map(b->ds, b->gs, ring);
fd6_emit_link_map<CHIP>(b->ctx, b->ds, b->gs, ring);
} else {
fd6_emit_link_map(b->vs, b->gs, ring);
fd6_emit_link_map<CHIP>(b->ctx, b->vs, b->gs, ring);
}
vertices_out = MAX2(1, b->gs->gs.vertices_out) - 1;
enum a6xx_tess_output output =
primitive_to_tess((enum mesa_prim)b->gs->gs.output_primitive);
@ -1451,23 +1452,33 @@ fd6_program_create(void *data, const struct ir3_shader_variant *bs,
/* Note that binning pass uses same const state as draw pass: */
state->user_consts_cmdstream_size =
fd6_user_consts_cmdstream_size(state->vs) +
fd6_user_consts_cmdstream_size(state->hs) +
fd6_user_consts_cmdstream_size(state->ds) +
fd6_user_consts_cmdstream_size(state->gs) +
fd6_user_consts_cmdstream_size(state->fs);
fd6_user_consts_cmdstream_size<CHIP>(state->vs) +
fd6_user_consts_cmdstream_size<CHIP>(state->hs) +
fd6_user_consts_cmdstream_size<CHIP>(state->ds) +
fd6_user_consts_cmdstream_size<CHIP>(state->gs) +
fd6_user_consts_cmdstream_size<CHIP>(state->fs);
unsigned num_dp = 0;
unsigned num_ubo_dp = 0;
if (vs->need_driver_params)
num_dp++;
if (gs && gs->need_driver_params)
num_dp++;
num_ubo_dp++;
if (hs && hs->need_driver_params)
num_dp++;
num_ubo_dp++;
if (ds && ds->need_driver_params)
num_dp++;
num_ubo_dp++;
if (!(CHIP == A7XX && vs->compiler->load_inline_uniforms_via_preamble_ldgk)) {
/* On a6xx all shader stages use driver params pushed in cmdstream: */
num_dp += num_ubo_dp;
num_ubo_dp = 0;
}
state->num_driver_params = num_dp;
state->num_ubo_driver_params = num_ubo_dp;
/* dual source blending has an extra fs output in the 2nd slot */
if (fs->fs.color_is_dual_source) {

View file

@ -38,13 +38,18 @@ struct fd6_program_state {
* Whether multiple viewports are used is determined by whether
* the last shader stage writes viewport id
*/
uint16_t num_viewports;
uint8_t num_viewports;
/**
* The # of shader stages that need driver params.
*/
uint8_t num_driver_params;
/**
* The # of shader stages that need ubo driver params
*/
uint8_t num_ubo_driver_params;
/**
* Output components from frag shader. It is possible to have
* a fragment shader that only writes a subset of the bound

View file

@ -574,6 +574,11 @@ ir3_screen_init(struct pipe_screen *pscreen)
if (screen->gen >= 6) {
options.lower_base_vertex = true;
}
if (screen->gen >= 7) {
options.push_ubo_with_preamble = true;
}
screen->compiler =
ir3_compiler_create(screen->dev, screen->dev_id, screen->info, &options);