r600g: revert some asm optimisations

They didn't have the desired effect and are still quite buggy
This commit is contained in:
Christian König 2011-03-19 11:40:22 +01:00
parent 2bf95c519e
commit 74e1d64c6d
4 changed files with 158 additions and 1029 deletions

View file

@ -32,14 +32,12 @@
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
unsigned id = cf->id;
unsigned end_of_program = bc->cf.prev == &cf->list;
switch (cf->inst) {
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
assert(!end_of_program);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
@ -48,16 +46,15 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
S_SQ_CF_ALU_WORD1_BARRIER(1) |
S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
break;
case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
S_SQ_CF_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
break;
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@ -70,9 +67,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
break;
case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@ -85,10 +82,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
S_SQ_CF_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_WORD1_COND(cf->cond) |
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
break;
default:

File diff suppressed because it is too large Load diff

View file

@ -109,6 +109,8 @@ struct r600_bc_vtx {
struct r600_bc_output {
unsigned array_base;
unsigned type;
unsigned end_of_program;
unsigned inst;
unsigned elem_size;
unsigned gpr;
unsigned swizzle_x;
@ -116,6 +118,7 @@ struct r600_bc_output {
unsigned swizzle_z;
unsigned swizzle_w;
unsigned burst_count;
unsigned barrier;
};
struct r600_bc_kcache {
@ -133,7 +136,6 @@ struct r600_bc_cf {
unsigned cond;
unsigned pop_count;
unsigned cf_addr; /* control flow addr */
unsigned barrier;
struct r600_bc_kcache kcache[2];
unsigned r6xx_uses_waterfall;
struct list_head alu;

View file

@ -567,7 +567,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned noutput;
unsigned output_done, noutput;
unsigned opcode;
int i, r = 0, pos0;
@ -701,8 +701,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
switch (ctx.type) {
case TGSI_PROCESSOR_VERTEX:
if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
@ -763,8 +765,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
noutput++;
}
}
@ -778,10 +782,22 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
output[0].burst_count = 1;
output[0].barrier = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
noutput++;
}
/* set export done on last export of each type */
for (i = noutput - 1, output_done = 0; i >= 0; i--) {
if (i == (noutput - 1)) {
output[i].end_of_program = 1;
}
if (!(output_done & (1 << output[i].type))) {
output_done |= (1 << output[i].type);
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
}
}
/* add output to bytecode */
for (i = 0; i < noutput; i++) {
r = r600_bc_add_output(ctx.bc, &output[i]);