mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 20:08:06 +02:00
nv50: use actual loads/stores if TEMPs are accessed indirectly
This commit is contained in:
parent
d8dcff7970
commit
f30810cb68
10 changed files with 122 additions and 23 deletions
|
|
@ -414,6 +414,8 @@ nv50_generate_code(struct nv50_translation_info *ti)
|
|||
nv_print_program(pc);
|
||||
#endif
|
||||
|
||||
pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
|
||||
|
||||
/* optimization */
|
||||
ret = nv_pc_exec_pass0(pc);
|
||||
if (ret)
|
||||
|
|
|
|||
|
|
@ -345,6 +345,9 @@ struct nv_pc {
|
|||
|
||||
struct nv_fixup *fixups;
|
||||
int num_fixups;
|
||||
|
||||
/* optimization enables */
|
||||
boolean opt_reload_elim;
|
||||
};
|
||||
|
||||
void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *);
|
||||
|
|
|
|||
|
|
@ -412,25 +412,25 @@ emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask)
|
|||
}
|
||||
|
||||
static void
|
||||
set_ld_st_size(struct nv_pc *pc, ubyte type)
|
||||
set_ld_st_size(struct nv_pc *pc, int s, ubyte type)
|
||||
{
|
||||
switch (type) {
|
||||
case NV_TYPE_F64:
|
||||
pc->emit[1] |= 0x8000;
|
||||
pc->emit[1] |= 0x8000 << s;
|
||||
break;
|
||||
case NV_TYPE_F32:
|
||||
case NV_TYPE_S32:
|
||||
case NV_TYPE_U32:
|
||||
pc->emit[1] |= 0xc000;
|
||||
pc->emit[1] |= 0xc000 << s;
|
||||
break;
|
||||
case NV_TYPE_S16:
|
||||
pc->emit[1] |= 0x6000;
|
||||
pc->emit[1] |= 0x6000 << s;
|
||||
break;
|
||||
case NV_TYPE_U16:
|
||||
pc->emit[1] |= 0x4000;
|
||||
pc->emit[1] |= 0x4000 << s;
|
||||
break;
|
||||
case NV_TYPE_S8:
|
||||
pc->emit[1] |= 0x2000;
|
||||
pc->emit[1] |= 0x2000 << s;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
@ -473,12 +473,14 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
|
|||
if (sf == NV_FILE_MEM_L) {
|
||||
pc->emit[0] = 0xd0000001;
|
||||
pc->emit[1] = 0x40000000;
|
||||
|
||||
set_addr(pc, i);
|
||||
} else {
|
||||
NOUVEAU_ERR("invalid ld source file\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
set_ld_st_size(pc, STYPE(i, 0));
|
||||
set_ld_st_size(pc, (sf == NV_FILE_MEM_L) ? 8 : 0, STYPE(i, 0));
|
||||
|
||||
set_dst(pc, i->def[0]);
|
||||
set_pred_wr(pc, i);
|
||||
|
|
@ -495,7 +497,19 @@ emit_ld(struct nv_pc *pc, struct nv_instruction *i)
|
|||
static void
|
||||
emit_st(struct nv_pc *pc, struct nv_instruction *i)
|
||||
{
|
||||
assert(SFILE(i, 1) == NV_FILE_GPR);
|
||||
assert(SFILE(i, 0) == NV_FILE_MEM_L);
|
||||
|
||||
pc->emit[0] = 0xd0000001;
|
||||
pc->emit[1] = 0x60000000;
|
||||
|
||||
SID(pc, i->src[1], 2);
|
||||
SID(pc, i->src[0], 9);
|
||||
|
||||
set_ld_st_size(pc, 8, STYPE(i, 1));
|
||||
|
||||
set_addr(pc, i);
|
||||
set_pred(pc, i);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ inst_commutation_legal(struct nv_instruction *a,
|
|||
static INLINE boolean
|
||||
inst_cullable(struct nv_instruction *nvi)
|
||||
{
|
||||
if (nvi->opcode == NV_OP_STA)
|
||||
return FALSE;
|
||||
return (!(nvi->is_terminator || nvi->is_join ||
|
||||
nvi->target ||
|
||||
nvi->fixed ||
|
||||
|
|
@ -739,6 +741,7 @@ struct nv_pass_reld_elim {
|
|||
int alloc;
|
||||
};
|
||||
|
||||
/* TODO: properly handle loads from l[] memory in the presence of stores */
|
||||
static int
|
||||
nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
|
||||
{
|
||||
|
|
@ -1074,13 +1077,15 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
|
||||
reldelim->pc = pc;
|
||||
pc->pass_seq++;
|
||||
ret = nv_pass_reload_elim(reldelim, root);
|
||||
FREE(reldelim);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (pc->opt_reload_elim) {
|
||||
reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
|
||||
reldelim->pc = pc;
|
||||
pc->pass_seq++;
|
||||
ret = nv_pass_reload_elim(reldelim, root);
|
||||
FREE(reldelim);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
pc->pass_seq++;
|
||||
ret = nv_pass_cse(&pass, root);
|
||||
|
|
|
|||
|
|
@ -217,6 +217,9 @@ nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
|
|||
case NV_FILE_FLAGS:
|
||||
PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_L:
|
||||
nv_print_address('l', -1, ind, 4 * nv_value_id(value));
|
||||
break;
|
||||
case NV_FILE_MEM_S:
|
||||
nv_print_address('s', -1, ind, 4 * nv_value_id(value));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -168,10 +168,17 @@ prog_inst(struct nv50_translation_info *ti,
|
|||
inst->Src[0].Register.File == TGSI_FILE_INPUT &&
|
||||
dst->Index == ti->edgeflag_out)
|
||||
ti->p->vp.edgeflag = inst->Src[0].Register.Index;
|
||||
} else
|
||||
if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
|
||||
if (inst->Dst[0].Register.Indirect)
|
||||
ti->store_to_memory = TRUE;
|
||||
}
|
||||
|
||||
for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
|
||||
src = &inst->Src[s].Register;
|
||||
if (src->File == TGSI_FILE_TEMPORARY)
|
||||
if (inst->Src[s].Register.Indirect)
|
||||
ti->store_to_memory = TRUE;
|
||||
if (src->File != TGSI_FILE_INPUT)
|
||||
continue;
|
||||
mask = nv50_tgsi_src_mask(inst, s);
|
||||
|
|
|
|||
|
|
@ -116,6 +116,7 @@ struct nv50_translation_info {
|
|||
int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
|
||||
boolean indirect_inputs;
|
||||
boolean indirect_outputs;
|
||||
boolean store_to_memory;
|
||||
struct tgsi_shader_info scan;
|
||||
uint32_t *immd32;
|
||||
unsigned immd32_nr;
|
||||
|
|
|
|||
|
|
@ -274,7 +274,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
|||
uint64_t value;
|
||||
unsigned chipset = dev->chipset;
|
||||
unsigned tesla_class = 0;
|
||||
unsigned stack_size;
|
||||
unsigned stack_size, local_size, max_warps;
|
||||
int ret, i;
|
||||
const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
|
||||
|
||||
|
|
@ -495,9 +495,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
|||
/* shader stack */
|
||||
nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
|
||||
|
||||
stack_size = util_bitcount(value & 0xffff);
|
||||
stack_size *= util_bitcount((value >> 24) & 0xf);
|
||||
stack_size *= 32 * 64 * 8;
|
||||
max_warps = util_bitcount(value & 0xffff);
|
||||
max_warps *= util_bitcount((value >> 24) & 0xf) * 32;
|
||||
|
||||
stack_size = max_warps * 64 * 8;
|
||||
|
||||
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
|
||||
stack_size, &screen->stack_bo);
|
||||
|
|
@ -510,6 +511,22 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
|||
OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RING (chan, 4);
|
||||
|
||||
local_size = (NV50_CAP_MAX_PROGRAM_TEMPS * 16) * max_warps * 32;
|
||||
|
||||
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
|
||||
local_size, &screen->local_bo);
|
||||
if (ret) {
|
||||
nv50_screen_destroy(pscreen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
local_size = NV50_CAP_MAX_PROGRAM_TEMPS * 16;
|
||||
|
||||
BEGIN_RING(chan, screen->tesla, NV50TCL_LOCAL_ADDRESS_HIGH, 3);
|
||||
OUT_RELOCh(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RELOCl(chan, screen->local_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RING (chan, util_unsigned_logbase2(local_size / 8));
|
||||
|
||||
/* Vertex array limits - max them out */
|
||||
for (i = 0; i < 16; i++) {
|
||||
BEGIN_RING(chan, screen->tesla,
|
||||
|
|
|
|||
|
|
@ -25,7 +25,8 @@ struct nv50_screen {
|
|||
struct nouveau_bo *tic;
|
||||
struct nouveau_bo *tsc;
|
||||
|
||||
struct nouveau_bo *stack_bo;
|
||||
struct nouveau_bo *stack_bo; /* control flow stack */
|
||||
struct nouveau_bo *local_bo; /* l[] memory */
|
||||
|
||||
boolean force_push;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -558,6 +558,38 @@ bld_insn_3(struct bld_context *bld, uint opcode,
|
|||
return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
|
||||
}
|
||||
|
||||
static void
|
||||
bld_lmem_store(struct bld_context *bld, struct nv_value *ptr, int ofst,
|
||||
struct nv_value *val)
|
||||
{
|
||||
struct nv_instruction *insn = new_instruction(bld->pc, NV_OP_STA);
|
||||
struct nv_value *loc;
|
||||
|
||||
loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32);
|
||||
|
||||
loc->reg.id = ofst * 4;
|
||||
|
||||
nv_reference(bld->pc, &insn->src[0], loc);
|
||||
nv_reference(bld->pc, &insn->src[1], val);
|
||||
nv_reference(bld->pc, &insn->src[4], ptr);
|
||||
}
|
||||
|
||||
static struct nv_value *
|
||||
bld_lmem_load(struct bld_context *bld, struct nv_value *ptr, int ofst)
|
||||
{
|
||||
struct nv_value *loc, *val;
|
||||
|
||||
loc = new_value(bld->pc, NV_FILE_MEM_L, NV_TYPE_U32);
|
||||
|
||||
loc->reg.id = ofst * 4;
|
||||
|
||||
val = bld_insn_1(bld, NV_OP_LDA, loc);
|
||||
|
||||
nv_reference(bld->pc, &val->insn->src[4], ptr);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
#define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
|
||||
do { \
|
||||
(d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
|
||||
|
|
@ -854,10 +886,18 @@ infer_dst_type(unsigned opcode)
|
|||
|
||||
static void
|
||||
emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
|
||||
unsigned chan, struct nv_value *value)
|
||||
unsigned chan, struct nv_value *value)
|
||||
{
|
||||
struct nv_value *ptr;
|
||||
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
|
||||
|
||||
if (reg->Register.Indirect) {
|
||||
ptr = FETCH_ADDR(reg->Indirect.Index,
|
||||
tgsi_util_get_src_register_swizzle(®->Indirect, 0));
|
||||
} else {
|
||||
ptr = NULL;
|
||||
}
|
||||
|
||||
assert(chan < 4);
|
||||
|
||||
if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
|
||||
|
|
@ -893,7 +933,11 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
|
|||
value->reg.file = NV_FILE_GPR;
|
||||
if (value->insn->bb != bld->pc->current_block)
|
||||
value = bld_insn_1(bld, NV_OP_MOV, value);
|
||||
STORE_TEMP(reg->Register.Index, chan, value);
|
||||
|
||||
if (bld->ti->store_to_memory)
|
||||
bld_lmem_store(bld, ptr, reg->Register.Index * 4 + chan, value);
|
||||
else
|
||||
STORE_TEMP(reg->Register.Index, chan, value);
|
||||
break;
|
||||
case TGSI_FILE_ADDRESS:
|
||||
assert(reg->Register.Index < BLD_MAX_ADDRS);
|
||||
|
|
@ -1064,8 +1108,10 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
|
|||
bld->saved_inputs[bld->ti->input_map[idx][swz]] = res;
|
||||
break;
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
/* this should be load from l[], with reload elimination later on */
|
||||
res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
|
||||
if (bld->ti->store_to_memory)
|
||||
res = bld_lmem_load(bld, ptr, idx * 4 + swz);
|
||||
else
|
||||
res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
|
||||
break;
|
||||
case TGSI_FILE_ADDRESS:
|
||||
res = bld_fetch_global(bld, &bld->avs[idx][swz]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue