r600g: lazy load for AR register

Emit MOVA* instruction only when AR is used.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
This commit is contained in:
Vadim Girlin 2011-11-13 22:08:33 +04:00 committed by Alex Deucher
parent 88a140cd19
commit 8e366dc365
3 changed files with 61 additions and 46 deletions

View file

@ -255,6 +255,7 @@ static int r600_bytecode_add_cf(struct r600_bytecode *bc)
bc->ncf++;
bc->ndw += 2;
bc->force_add_cf = 0;
bc->ar_loaded = 0;
return 0;
}
@ -1203,6 +1204,32 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60
return 0;
}
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
static int load_ar(struct r600_bytecode *bc)
{
struct r600_bytecode_alu alu;
int r;
if (bc->ar_loaded)
return 0;
/* hack to avoid making MOVA the last instruction in the clause */
if ((bc->cf_last->ndw>>1) >= 110)
bc->force_add_cf = 1;
memset(&alu, 0, sizeof(alu));
alu.inst = BC_INST(bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
alu.src[0].sel = bc->ar_reg;
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
return r;
bc->cf_last->r6xx_uses_waterfall = 1;
bc->ar_loaded = 1;
return 0;
}
int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type)
{
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
@ -1237,6 +1264,14 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytec
}
bc->cf_last->inst = (type << 3);
/* Check AR usage and load it if required */
for (i = 0; i < 3; i++)
if (nalu->src[i].rel && !bc->ar_loaded)
load_ar(bc);
if (nalu->dst.rel && !bc->ar_loaded)
load_ar(bc);
/* Setup the kcache for this ALU instruction. This will start a new
* ALU clause if needed. */
if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {

View file

@ -184,6 +184,8 @@ struct r600_bytecode {
struct r600_cf_stack_entry fc_stack[32];
unsigned call_sp;
struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
unsigned ar_loaded;
unsigned ar_reg;
};
/* eg_asm.c */

View file

@ -166,7 +166,6 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bytecode *bc;
struct r600_shader *shader;
@ -553,7 +552,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
memset(&alu, 0, sizeof(alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
alu.src[0].sel = ctx->ar_reg;
alu.src[0].sel = ctx->bc->ar_reg;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = offset;
@ -567,7 +566,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
ar_reg = dst_reg;
} else {
ar_reg = ctx->ar_reg;
ar_reg = ctx->bc->ar_reg;
}
memset(&vtx, 0, sizeof(vtx));
@ -750,9 +749,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
ctx.temp_reg = ctx.ar_reg + 1;
ctx.temp_reg = ctx.bc->ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
@ -2942,45 +2941,26 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
break;
case TGSI_OPCODE_UARL:
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
break;
default:
assert(0);
return -1;
}
if (alu.inst) {
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
/* TODO: Note that the MOVA can be avoided if we never use AR for
* indexing non-CB registers in the current ALU clause. Similarly, we
* need to load AR from ar_reg again if we started a new clause
* between ARL and AR usage. The easy way to do that is to remove
* the MOVA here, and load it for the first AR access after ar_reg
* has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
else {
alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
}
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
ctx->bc->ar_loaded = 0;
return 0;
}
static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
{
/* TODO from r600c, ar values don't persist between clauses */
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
@ -2990,7 +2970,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->ar_reg;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@ -2999,8 +2979,8 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
alu.src[0].sel = ctx->ar_reg;
alu.dst.sel = ctx->ar_reg;
alu.src[0].sel = ctx->bc->ar_reg;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@ -3011,7 +2991,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->ar_reg;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
@ -3019,24 +2999,22 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
return r;
break;
case TGSI_OPCODE_UARL:
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
return r;
break;
default:
assert(0);
return -1;
}
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
else
alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
ctx->bc->cf_last->r6xx_uses_waterfall = 1;
ctx->bc->ar_loaded = 0;
return 0;
}