mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 03:50:13 +01:00
freedreno/ir3+a6xx: same VBO state for draw/binning
Worth ~+20% on gl_driver2 Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
parent
4b82d1bbb7
commit
882d53d8e3
8 changed files with 150 additions and 22 deletions
|
|
@ -1081,7 +1081,7 @@ void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so);
|
|||
|
||||
/* register assignment: */
|
||||
struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
|
||||
int ir3_ra(struct ir3 *ir3);
|
||||
int ir3_ra(struct ir3_shader_variant *v);
|
||||
|
||||
/* legalize: */
|
||||
void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
|
||||
|
|
|
|||
|
|
@ -2906,6 +2906,32 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
|
||||
fixup_binning_pass(ctx);
|
||||
|
||||
/* for a6xx+, binning and draw pass VS use same VBO state, so we
|
||||
* need to make sure not to remove any inputs that are used by
|
||||
* the nonbinning VS.
|
||||
*/
|
||||
if (ctx->compiler->gpu_id >= 600 && so->binning_pass) {
|
||||
debug_assert(so->type == MESA_SHADER_VERTEX);
|
||||
for (int i = 0; i < ir->ninputs; i++) {
|
||||
struct ir3_instruction *in = ir->inputs[i];
|
||||
|
||||
if (!in)
|
||||
continue;
|
||||
|
||||
unsigned n = i / 4;
|
||||
unsigned c = i % 4;
|
||||
|
||||
debug_assert(n < so->nonbinning->inputs_count);
|
||||
|
||||
if (so->nonbinning->inputs[n].sysval)
|
||||
continue;
|
||||
|
||||
/* be sure to keep inputs, even if only used in VS */
|
||||
if (so->nonbinning->inputs[n].compmask & (1 << c))
|
||||
array_insert(in->block, in->block->keeps, in);
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert mov if there's same instruction for each output.
|
||||
* eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
|
||||
*/
|
||||
|
|
@ -2962,7 +2988,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
ir3_print(ir);
|
||||
}
|
||||
|
||||
ret = ir3_ra(ir);
|
||||
ret = ir3_ra(so);
|
||||
if (ret) {
|
||||
DBG("RA failed!");
|
||||
goto out;
|
||||
|
|
@ -3003,13 +3029,17 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
for (j = 0; j < 4; j++) {
|
||||
struct ir3_instruction *in = inputs[(i*4) + j];
|
||||
|
||||
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
|
||||
reg = in->regs[0]->num - j;
|
||||
if (half) {
|
||||
compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
|
||||
} else {
|
||||
half = !!(in->regs[0]->flags & IR3_REG_HALF);
|
||||
}
|
||||
if (!in)
|
||||
continue;
|
||||
|
||||
if (in->flags & IR3_INSTR_UNUSED)
|
||||
continue;
|
||||
|
||||
reg = in->regs[0]->num - j;
|
||||
if (half) {
|
||||
compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
|
||||
} else {
|
||||
half = !!(in->regs[0]->flags & IR3_REG_HALF);
|
||||
}
|
||||
}
|
||||
so->inputs[i].regid = reg;
|
||||
|
|
|
|||
|
|
@ -330,6 +330,7 @@ struct ir3_ra_instr_data {
|
|||
|
||||
/* register-assign context, per-shader */
|
||||
struct ir3_ra_ctx {
|
||||
struct ir3_shader_variant *v;
|
||||
struct ir3 *ir;
|
||||
|
||||
struct ir3_ra_reg_set *set;
|
||||
|
|
@ -1091,6 +1092,60 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
|||
static int
|
||||
ra_alloc(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
/* Pre-assign VS inputs on a6xx+ binning pass shader, to align
|
||||
* with draw pass VS, so binning and draw pass can both use the
|
||||
* same VBO state.
|
||||
*
|
||||
* Note that VS inputs are expected to be full precision.
|
||||
*/
|
||||
bool pre_assign_inputs = (ctx->ir->compiler->gpu_id >= 600) &&
|
||||
(ctx->ir->type == MESA_SHADER_VERTEX) &&
|
||||
ctx->v->binning_pass;
|
||||
|
||||
if (pre_assign_inputs) {
|
||||
for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
|
||||
struct ir3_instruction *instr = ctx->ir->inputs[i];
|
||||
|
||||
if (!instr)
|
||||
continue;
|
||||
|
||||
debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
|
||||
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
|
||||
/* only consider the first component: */
|
||||
if (id->off > 0)
|
||||
continue;
|
||||
|
||||
unsigned name = ra_name(ctx, id);
|
||||
|
||||
unsigned n = i / 4;
|
||||
unsigned c = i % 4;
|
||||
|
||||
/* 'base' is in scalar (class 0) but we need to map that
|
||||
* the conflicting register of the appropriate class (ie.
|
||||
* input could be vec2/vec3/etc)
|
||||
*
|
||||
* Note that the higher class (larger than scalar) regs
|
||||
* are setup to conflict with others in the same class,
|
||||
* so for example, R1 (scalar) is also the first component
|
||||
* of D1 (vec2/double):
|
||||
*
|
||||
* Single (base) | Double
|
||||
* --------------+---------------
|
||||
* R0 | D0
|
||||
* R1 | D0 D1
|
||||
* R2 | D1 D2
|
||||
* R3 | D2
|
||||
* .. and so on..
|
||||
*/
|
||||
unsigned reg = ctx->set->gpr_to_ra_reg[id->cls]
|
||||
[ctx->v->nonbinning->inputs[n].regid + c];
|
||||
|
||||
ra_set_node_reg(ctx->g, name, reg);
|
||||
}
|
||||
}
|
||||
|
||||
/* pre-assign array elements:
|
||||
*/
|
||||
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
|
||||
|
|
@ -1118,6 +1173,35 @@ retry:
|
|||
}
|
||||
}
|
||||
|
||||
/* also need to not conflict with any pre-assigned inputs: */
|
||||
if (pre_assign_inputs) {
|
||||
for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
|
||||
struct ir3_instruction *instr = ctx->ir->inputs[i];
|
||||
|
||||
if (!instr)
|
||||
continue;
|
||||
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
|
||||
/* only consider the first component: */
|
||||
if (id->off > 0)
|
||||
continue;
|
||||
|
||||
unsigned name = ra_name(ctx, id);
|
||||
|
||||
/* Check if array intersects with liverange AND register
|
||||
* range of the input:
|
||||
*/
|
||||
if (intersects(arr->start_ip, arr->end_ip,
|
||||
ctx->def[name], ctx->use[name]) &&
|
||||
intersects(base, base + arr->length,
|
||||
i, i + class_sizes[id->cls])) {
|
||||
base = MAX2(base, i + class_sizes[id->cls]);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arr->reg = base;
|
||||
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
|
|
@ -1140,11 +1224,12 @@ retry:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int ir3_ra(struct ir3 *ir)
|
||||
int ir3_ra(struct ir3_shader_variant *v)
|
||||
{
|
||||
struct ir3_ra_ctx ctx = {
|
||||
.ir = ir,
|
||||
.set = ir->compiler->set,
|
||||
.v = v,
|
||||
.ir = v->ir,
|
||||
.set = v->ir->compiler->set,
|
||||
};
|
||||
int ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -178,9 +178,14 @@ assemble_variant(struct ir3_shader_variant *v)
|
|||
v->ir = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* For creating normal shader variants, 'nonbinning' is NULL. For
|
||||
* creating binning pass shader, it is link to corresponding normal
|
||||
* (non-binning) variant.
|
||||
*/
|
||||
static struct ir3_shader_variant *
|
||||
create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
|
||||
bool binning_pass)
|
||||
struct ir3_shader_variant *nonbinning)
|
||||
{
|
||||
struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
|
||||
int ret;
|
||||
|
|
@ -190,7 +195,8 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
|
|||
|
||||
v->id = ++shader->variant_count;
|
||||
v->shader = shader;
|
||||
v->binning_pass = binning_pass;
|
||||
v->binning_pass = !!nonbinning;
|
||||
v->nonbinning = nonbinning;
|
||||
v->key = *key;
|
||||
v->type = shader->type;
|
||||
|
||||
|
|
@ -226,7 +232,7 @@ shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
|
|||
return v;
|
||||
|
||||
/* compile new variant if it doesn't exist already: */
|
||||
v = create_variant(shader, key, false);
|
||||
v = create_variant(shader, key, NULL);
|
||||
if (v) {
|
||||
v->next = shader->variants;
|
||||
shader->variants = v;
|
||||
|
|
@ -246,7 +252,7 @@ ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
|
|||
|
||||
if (v && binning_pass) {
|
||||
if (!v->binning) {
|
||||
v->binning = create_variant(shader, key, true);
|
||||
v->binning = create_variant(shader, key, v);
|
||||
*created = true;
|
||||
}
|
||||
mtx_unlock(&shader->variants_lock);
|
||||
|
|
|
|||
|
|
@ -391,7 +391,10 @@ struct ir3_shader_variant {
|
|||
* which is pointed to by so->binning:
|
||||
*/
|
||||
bool binning_pass;
|
||||
struct ir3_shader_variant *binning;
|
||||
// union {
|
||||
struct ir3_shader_variant *binning;
|
||||
struct ir3_shader_variant *nonbinning;
|
||||
// };
|
||||
|
||||
struct ir3_info info;
|
||||
struct ir3 *ir;
|
||||
|
|
|
|||
|
|
@ -791,10 +791,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
struct fd_ringbuffer *state;
|
||||
|
||||
state = build_vbo_state(emit, emit->vs);
|
||||
fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x6);
|
||||
|
||||
state = build_vbo_state(emit, emit->bs);
|
||||
fd6_emit_take_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1);
|
||||
fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x7);
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_ZSA) {
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ enum fd6_state_id {
|
|||
FD6_GROUP_LRZ,
|
||||
FD6_GROUP_LRZ_BINNING,
|
||||
FD6_GROUP_VBO,
|
||||
FD6_GROUP_VBO_BINNING,
|
||||
FD6_GROUP_VS_CONST,
|
||||
FD6_GROUP_FS_CONST,
|
||||
FD6_GROUP_VS_TEX,
|
||||
|
|
|
|||
|
|
@ -703,6 +703,14 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
|
|||
state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
|
||||
#ifdef DEBUG
|
||||
for (unsigned i = 0; i < bs->inputs_count; i++) {
|
||||
if (vs->inputs[i].sysval)
|
||||
continue;
|
||||
debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
|
||||
}
|
||||
#endif
|
||||
|
||||
setup_config_stateobj(state->config_stateobj, state);
|
||||
setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
|
||||
setup_stateobj(state->stateobj, ctx->screen, state, key, false);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue