mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
r600g: rework literal handling
This commit is contained in:
parent
adf89a3329
commit
26127d6a2f
5 changed files with 148 additions and 262 deletions
|
|
@ -599,10 +599,90 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a
|
|||
return 0;
|
||||
}
|
||||
|
||||
void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
|
||||
{
|
||||
switch(value) {
|
||||
case 0:
|
||||
*sel = V_SQ_ALU_SRC_0;
|
||||
break;
|
||||
case 1:
|
||||
*sel = V_SQ_ALU_SRC_1_INT;
|
||||
break;
|
||||
case -1:
|
||||
*sel = V_SQ_ALU_SRC_M_1_INT;
|
||||
break;
|
||||
case 0x3F800000: // 1.0f
|
||||
*sel = V_SQ_ALU_SRC_1;
|
||||
break;
|
||||
case 0x3F000000: // 0.5f
|
||||
*sel = V_SQ_ALU_SRC_0_5;
|
||||
break;
|
||||
case 0xBF800000: // -1.0f
|
||||
*sel = V_SQ_ALU_SRC_1;
|
||||
*neg ^= 1;
|
||||
break;
|
||||
case 0xBF000000: // -0.5f
|
||||
*sel = V_SQ_ALU_SRC_0_5;
|
||||
*neg ^= 1;
|
||||
break;
|
||||
default:
|
||||
*sel = V_SQ_ALU_SRC_LITERAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* compute how many literal are needed */
|
||||
static int r600_bc_alu_nliterals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned *nliteral)
|
||||
{
|
||||
unsigned num_src = r600_bc_get_num_operands(alu);
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < num_src; ++i) {
|
||||
if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
|
||||
uint32_t value = alu->src[i].value[alu->src[i].chan];
|
||||
unsigned found = 0;
|
||||
for (j = 0; j < *nliteral; ++j) {
|
||||
if (literal[j] == value) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
if (*nliteral >= 4)
|
||||
return -EINVAL;
|
||||
literal[(*nliteral)++] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void r600_bc_alu_adjust_literals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned nliteral)
|
||||
{
|
||||
unsigned num_src = r600_bc_get_num_operands(alu);
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < num_src; ++i) {
|
||||
if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
|
||||
uint32_t value = alu->src[i].value[alu->src[i].chan];
|
||||
for (j = 0; j < nliteral; ++j) {
|
||||
if (literal[j] == value) {
|
||||
alu->src[i].chan = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
|
||||
{
|
||||
struct r600_bc_alu *prev[5];
|
||||
struct r600_bc_alu *result[5] = { NULL };
|
||||
|
||||
uint32_t literal[4];
|
||||
unsigned nliteral = 0;
|
||||
|
||||
int i, j, r, src, num_src;
|
||||
int num_once_inst = 0;
|
||||
|
||||
|
|
@ -611,13 +691,12 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s
|
|||
return r;
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
// TODO: we have literals? forget it!
|
||||
if (prev[i] && prev[i]->nliteral)
|
||||
/* check number of literals */
|
||||
if (prev[i] && r600_bc_alu_nliterals(prev[i], literal, &nliteral))
|
||||
return 0;
|
||||
if (slots[i] && slots[i]->nliteral)
|
||||
if (slots[i] && r600_bc_alu_nliterals(slots[i], literal, &nliteral))
|
||||
return 0;
|
||||
|
||||
|
||||
// let's check used slots
|
||||
if (prev[i] && !slots[i]) {
|
||||
result[i] = prev[i];
|
||||
|
|
@ -711,7 +790,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
|
|||
if (nalu == NULL)
|
||||
return -ENOMEM;
|
||||
memcpy(nalu, alu, sizeof(struct r600_bc_alu));
|
||||
nalu->nliteral = 0;
|
||||
|
||||
if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
|
||||
/* check if we could add it anyway */
|
||||
|
|
@ -749,20 +827,10 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
|
|||
if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
|
||||
bc->ngpr = alu->src[i].sel + 1;
|
||||
}
|
||||
/* compute how many literal are needed
|
||||
* either 2 or 4 literals
|
||||
*/
|
||||
if (alu->src[i].sel == 253) {
|
||||
if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
|
||||
nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
|
||||
lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
|
||||
if (!lalu->last && lalu->nliteral > nalu->nliteral) {
|
||||
nalu->nliteral = lalu->nliteral;
|
||||
}
|
||||
if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
|
||||
r600_bc_special_constants(
|
||||
nalu->src[i].value[nalu->src[i].chan],
|
||||
&nalu->src[i].sel, &nalu->src[i].neg);
|
||||
}
|
||||
if (alu->dst.sel >= bc->ngpr) {
|
||||
bc->ngpr = alu->dst.sel + 1;
|
||||
|
|
@ -809,46 +877,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
|
|||
return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
|
||||
}
|
||||
|
||||
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
|
||||
{
|
||||
struct r600_bc_alu *alu;
|
||||
|
||||
if (bc->cf_last == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
|
||||
return 0;
|
||||
}
|
||||
/* all same on EG */
|
||||
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
|
||||
bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
|
||||
return 0;
|
||||
}
|
||||
/* same on EG */
|
||||
if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
|
||||
(bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
|
||||
(bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
|
||||
(bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
|
||||
LIST_IS_EMPTY(&bc->cf_last->alu)) {
|
||||
R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
|
||||
return -EINVAL;
|
||||
}
|
||||
alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
|
||||
if (!alu->last || !alu->nliteral || alu->literal_added) {
|
||||
return 0;
|
||||
}
|
||||
memcpy(alu->value, value, 4 * 4);
|
||||
bc->cf_last->ndw += alu->nliteral;
|
||||
bc->ndw += alu->nliteral;
|
||||
alu->literal_added = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
|
||||
{
|
||||
struct r600_bc_vtx *nvtx = r600_bc_vtx();
|
||||
|
|
@ -999,8 +1027,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
|
|||
/* r600 only, r700/eg bits in r700_asm.c */
|
||||
static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* don't replace gpr by pv or ps for destination register */
|
||||
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
|
||||
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
|
||||
|
|
@ -1037,14 +1063,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
|
|||
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
|
||||
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
|
||||
}
|
||||
if (alu->last) {
|
||||
if (alu->nliteral && !alu->literal_added) {
|
||||
R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
|
||||
}
|
||||
for (i = 0; i < alu->nliteral; i++) {
|
||||
bc->bytecode[id++] = alu->value[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -1122,8 +1140,10 @@ int r600_bc_build(struct r600_bc *bc)
|
|||
struct r600_bc_alu *alu;
|
||||
struct r600_bc_vtx *vtx;
|
||||
struct r600_bc_tex *tex;
|
||||
uint32_t literal[4];
|
||||
unsigned nliteral;
|
||||
unsigned addr;
|
||||
int r;
|
||||
int i, r;
|
||||
|
||||
if (bc->callstack[0].max > 0)
|
||||
bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
|
||||
|
|
@ -1140,6 +1160,16 @@ int r600_bc_build(struct r600_bc *bc)
|
|||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
|
||||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
|
||||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
|
||||
nliteral = 0;
|
||||
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
|
||||
r = r600_bc_alu_nliterals(alu, literal, &nliteral);
|
||||
if (r)
|
||||
return r;
|
||||
if (alu->last) {
|
||||
cf->ndw += align(nliteral, 2);
|
||||
nliteral = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
|
||||
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
|
||||
|
|
@ -1188,7 +1218,12 @@ int r600_bc_build(struct r600_bc *bc)
|
|||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
|
||||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
|
||||
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
|
||||
nliteral = 0;
|
||||
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
|
||||
r = r600_bc_alu_nliterals(alu, literal, &nliteral);
|
||||
if (r)
|
||||
return r;
|
||||
r600_bc_alu_adjust_literals(alu, literal, nliteral);
|
||||
switch(bc->chiprev) {
|
||||
case CHIPREV_R600:
|
||||
r = r600_bc_alu_build(bc, alu, addr);
|
||||
|
|
@ -1205,7 +1240,10 @@ int r600_bc_build(struct r600_bc *bc)
|
|||
return r;
|
||||
addr += 2;
|
||||
if (alu->last) {
|
||||
addr += alu->nliteral;
|
||||
for (i = 0; i < align(nliteral, 2); ++i) {
|
||||
bc->bytecode[addr++] = literal[i];
|
||||
}
|
||||
nliteral = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -1292,6 +1330,8 @@ void r600_bc_dump(struct r600_bc *bc)
|
|||
struct r600_bc_tex *tex;
|
||||
|
||||
unsigned i, id;
|
||||
uint32_t literal[4];
|
||||
unsigned nliteral;
|
||||
char chip = '6';
|
||||
|
||||
switch (bc->chiprev) {
|
||||
|
|
@ -1378,7 +1418,10 @@ void r600_bc_dump(struct r600_bc *bc)
|
|||
}
|
||||
|
||||
id = cf->addr;
|
||||
nliteral = 0;
|
||||
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
|
||||
r600_bc_alu_nliterals(alu, literal, &nliteral);
|
||||
|
||||
fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]);
|
||||
fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
|
||||
fprintf(stderr, "REL:%d ", alu->src[0].rel);
|
||||
|
|
@ -1413,10 +1456,12 @@ void r600_bc_dump(struct r600_bc *bc)
|
|||
|
||||
id++;
|
||||
if (alu->last) {
|
||||
for (i = 0; i < alu->nliteral; i++, id++) {
|
||||
for (i = 0; i < nliteral; i++, id++) {
|
||||
float *f = (float*)(bc->bytecode + id);
|
||||
fprintf(stderr, "%04d %08X %f\n", id, bc->bytecode[id], *f);
|
||||
}
|
||||
id += nliteral & 1;
|
||||
nliteral = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ struct r600_bc_alu_src {
|
|||
unsigned neg;
|
||||
unsigned abs;
|
||||
unsigned rel;
|
||||
u32 *value;
|
||||
};
|
||||
|
||||
struct r600_bc_alu_dst {
|
||||
|
|
@ -52,11 +53,8 @@ struct r600_bc_alu {
|
|||
unsigned last;
|
||||
unsigned is_op3;
|
||||
unsigned predicate;
|
||||
unsigned nliteral;
|
||||
unsigned literal_added;
|
||||
unsigned bank_swizzle;
|
||||
unsigned bank_swizzle_force;
|
||||
u32 value[4];
|
||||
unsigned omod;
|
||||
};
|
||||
|
||||
|
|
@ -195,13 +193,13 @@ void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
|
|||
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
|
||||
void r600_bc_clear(struct r600_bc *bc);
|
||||
int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
|
||||
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
|
||||
int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
|
||||
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
|
||||
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
|
||||
int r600_bc_build(struct r600_bc *bc);
|
||||
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
|
||||
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
|
||||
void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
|
||||
void r600_bc_dump(struct r600_bc *bc);
|
||||
void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
|
||||
void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
|
||||
|
|
|
|||
|
|
@ -225,21 +225,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
|
||||
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
|
||||
{
|
||||
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
|
||||
u32 *literals;
|
||||
int r;
|
||||
|
||||
//fprintf(stderr, "--------------------------------------------------------------\n");
|
||||
//tgsi_dump(tokens, 0);
|
||||
shader->shader.family = r600_get_family(rctx->radeon);
|
||||
r = r600_shader_from_tgsi(tokens, &shader->shader);
|
||||
r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
|
||||
if (r) {
|
||||
R600_ERR("translation from TGSI failed !\n");
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_build(&shader->shader.bc);
|
||||
free(literals);
|
||||
if (r) {
|
||||
R600_ERR("building bytecode failed !\n");
|
||||
return r;
|
||||
|
|
@ -272,7 +274,6 @@ struct r600_shader_ctx {
|
|||
struct r600_shader_tgsi_instruction *inst_info;
|
||||
struct r600_bc *bc;
|
||||
struct r600_shader *shader;
|
||||
u32 value[4];
|
||||
u32 *literals;
|
||||
u32 nliterals;
|
||||
u32 max_driver_temp_used;
|
||||
|
|
@ -481,7 +482,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
|
|||
return ctx->num_interp_gpr;
|
||||
}
|
||||
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
|
||||
{
|
||||
struct tgsi_full_immediate *immediate;
|
||||
struct r600_shader_ctx ctx;
|
||||
|
|
@ -583,9 +584,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
|
|||
else
|
||||
ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
|
||||
r = ctx.inst_info->process(&ctx);
|
||||
if (r)
|
||||
goto out_err;
|
||||
r = r600_bc_add_literal(ctx.bc, ctx.value);
|
||||
if (r)
|
||||
goto out_err;
|
||||
break;
|
||||
|
|
@ -706,7 +704,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
|
|||
if (r)
|
||||
goto out_err;
|
||||
}
|
||||
free(ctx.literals);
|
||||
*literals = ctx.literals;
|
||||
tgsi_parse_free(&ctx.parse);
|
||||
return 0;
|
||||
out_err:
|
||||
|
|
@ -740,38 +738,13 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
|
|||
(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
|
||||
|
||||
index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
|
||||
switch(ctx->literals[index]) {
|
||||
case 0:
|
||||
r600_src->sel = V_SQ_ALU_SRC_0;
|
||||
r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
|
||||
if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
|
||||
return 0;
|
||||
case 1:
|
||||
r600_src->sel = V_SQ_ALU_SRC_1_INT;
|
||||
return 0;
|
||||
case -1:
|
||||
r600_src->sel = V_SQ_ALU_SRC_M_1_INT;
|
||||
return 0;
|
||||
case 0x3F800000: // 1.0f
|
||||
r600_src->sel = V_SQ_ALU_SRC_1;
|
||||
return 0;
|
||||
case 0x3F000000: // 0.5f
|
||||
r600_src->sel = V_SQ_ALU_SRC_0_5;
|
||||
return 0;
|
||||
case 0xBF800000: // -1.0f
|
||||
r600_src->sel = V_SQ_ALU_SRC_1;
|
||||
r600_src->neg ^= 1;
|
||||
return 0;
|
||||
case 0xBF000000: // -0.5f
|
||||
r600_src->sel = V_SQ_ALU_SRC_0_5;
|
||||
r600_src->neg ^= 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
index = tgsi_src->Register.Index;
|
||||
r600_src->sel = V_SQ_ALU_SRC_LITERAL;
|
||||
ctx->value[0] = ctx->literals[index * 4 + 0];
|
||||
ctx->value[1] = ctx->literals[index * 4 + 1];
|
||||
ctx->value[2] = ctx->literals[index * 4 + 2];
|
||||
ctx->value[3] = ctx->literals[index * 4 + 3];
|
||||
r600_src->value = ctx->literals + index * 4;
|
||||
} else {
|
||||
if (tgsi_src->Register.Indirect)
|
||||
r600_src->rel = V_SQ_REL_RELATIVE;
|
||||
|
|
@ -877,6 +850,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
|
|||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
|
||||
alu.src[0].sel = r600_src[i].sel;
|
||||
alu.src[0].chan = k;
|
||||
alu.src[0].value = r600_src[i].value;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = k;
|
||||
alu.dst.write = 1;
|
||||
|
|
@ -886,9 +860,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
|
|||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
|
||||
if (r)
|
||||
return r;
|
||||
r600_src[i].sel = treg;
|
||||
j--;
|
||||
}
|
||||
|
|
@ -983,12 +954,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
|
|||
static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
|
||||
struct r600_bc_alu_src r600_src[3])
|
||||
{
|
||||
static float half_inv_pi = 1.0 /(3.1415926535 * 2);
|
||||
static float double_pi = 3.1415926535 * 2;
|
||||
static float neg_pi = -3.1415926535;
|
||||
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int r;
|
||||
uint32_t lit_vals[4];
|
||||
struct r600_bc_alu alu;
|
||||
|
||||
memset(lit_vals, 0, 4*4);
|
||||
r = tgsi_split_constant(ctx, r600_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
|
@ -996,9 +969,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
|
||||
lit_vals[1] = fui(0.5f);
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
|
||||
alu.is_op3 = 1;
|
||||
|
|
@ -1012,13 +982,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
|
|||
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[1].chan = 0;
|
||||
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[1].value = (uint32_t *)&half_inv_pi;
|
||||
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
|
||||
alu.src[2].chan = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, lit_vals);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
|
@ -1036,14 +1004,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
if (ctx->bc->chiprev == CHIPREV_R600) {
|
||||
lit_vals[0] = fui(3.1415926535897f * 2.0f);
|
||||
lit_vals[1] = fui(-3.1415926535897f);
|
||||
} else {
|
||||
lit_vals[0] = fui(1.0f);
|
||||
lit_vals[1] = fui(-0.5f);
|
||||
}
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
|
||||
alu.is_op3 = 1;
|
||||
|
|
@ -1059,11 +1019,18 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
|
|||
alu.src[1].chan = 0;
|
||||
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[2].chan = 1;
|
||||
|
||||
if (ctx->bc->chiprev == CHIPREV_R600) {
|
||||
alu.src[1].value = (uint32_t *)&double_pi;
|
||||
alu.src[2].value = (uint32_t *)&neg_pi;
|
||||
} else {
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_1;
|
||||
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
|
||||
alu.src[2].neg = 1;
|
||||
}
|
||||
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, lit_vals);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
|
|
@ -1181,10 +1148,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* dst.w = 1.0; */
|
||||
|
|
@ -1205,10 +1168,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1244,9 +1203,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* kill must be last in ALU */
|
||||
ctx->bc->force_add_cf = 1;
|
||||
|
|
@ -1309,10 +1265,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << 2))
|
||||
{
|
||||
int chan;
|
||||
|
|
@ -1331,10 +1283,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
chan = alu.dst.chan;
|
||||
sel = alu.dst.sel;
|
||||
|
||||
|
|
@ -1357,9 +1305,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
/* dst.z = exp(tmp.x) */
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
|
||||
|
|
@ -1401,9 +1346,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
|
|||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
/* replicate result */
|
||||
|
|
@ -1452,9 +1394,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
|
|||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
/* replicate result */
|
||||
|
|
@ -1478,9 +1417,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
|
|||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc,ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
/* b * LOG2(a) */
|
||||
|
|
@ -1495,9 +1431,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
|
|||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc,ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
/* POW(a,b) = EXP2(b * LOG2(a))*/
|
||||
|
|
@ -1508,9 +1441,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
|
|||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc,ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
return tgsi_helper_tempx_replicate(ctx);
|
||||
|
|
@ -1552,9 +1482,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* dst = (-tmp > 0 ? -1 : tmp) */
|
||||
for (i = 0; i < 4; i++) {
|
||||
|
|
@ -1589,9 +1516,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
|
|||
struct r600_bc_alu alu;
|
||||
int i, r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
|
||||
|
|
@ -1720,6 +1644,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
|
|||
|
||||
static int tgsi_tex(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
static float one_point_five = 1.5f;
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bc_tex tex;
|
||||
struct r600_bc_alu alu;
|
||||
|
|
@ -1729,7 +1654,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
|
|||
boolean src_not_temp =
|
||||
inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
|
||||
inst->Src[0].Register.File != TGSI_FILE_INPUT;
|
||||
uint32_t lit_vals[4];
|
||||
|
||||
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
|
||||
|
||||
|
|
@ -1878,6 +1802,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
|
|||
|
||||
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[2].chan = 0;
|
||||
alu.src[2].value = (u32*)&one_point_five;
|
||||
|
||||
alu.dst.sel = ctx->temp_reg;
|
||||
alu.dst.chan = 1;
|
||||
|
|
@ -1888,11 +1813,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
lit_vals[0] = fui(1.5f);
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, lit_vals);
|
||||
if (r)
|
||||
return r;
|
||||
src_not_temp = FALSE;
|
||||
src_gpr = ctx->temp_reg;
|
||||
}
|
||||
|
|
@ -2026,9 +1946,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* (1 - src0) * src2 */
|
||||
for (i = 0; i < lasti + 1; i++) {
|
||||
|
|
@ -2051,9 +1968,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* src0 * src1 + (1 - src0) * src2 */
|
||||
for (i = 0; i < lasti + 1; i++) {
|
||||
|
|
@ -2194,10 +2108,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
|
|
@ -2255,10 +2165,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
if (use_temp)
|
||||
return tgsi_helper_copy(ctx, inst);
|
||||
|
|
@ -2291,10 +2197,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
|
|
@ -2306,10 +2208,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.y = tmp - floor(tmp); */
|
||||
|
|
@ -2335,9 +2233,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.z = RoughApprox2ToX(tmp);*/
|
||||
|
|
@ -2358,9 +2253,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.w = 1.0;*/
|
||||
|
|
@ -2378,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return tgsi_helper_copy(ctx, inst);
|
||||
}
|
||||
|
|
@ -2410,10 +2299,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
|
|
@ -2426,10 +2311,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
|
||||
|
|
@ -2452,10 +2333,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
|
||||
|
|
@ -2471,10 +2348,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
|
||||
|
|
@ -2490,10 +2363,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
|
||||
|
|
@ -2509,10 +2378,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bc_alu));
|
||||
|
||||
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
|
||||
|
|
@ -2534,10 +2399,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.z = log2(src);*/
|
||||
|
|
@ -2559,10 +2420,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* result.w = 1.0; */
|
||||
|
|
@ -2581,10 +2438,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
|
|||
r = r600_bc_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = r600_bc_add_literal(ctx->bc, ctx->value);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return tgsi_helper_copy(ctx, inst);
|
||||
|
|
|
|||
|
|
@ -47,6 +47,6 @@ struct r600_shader {
|
|||
boolean uses_kill;
|
||||
};
|
||||
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
|
||||
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -29,8 +29,6 @@
|
|||
|
||||
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
|
||||
S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
|
||||
S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
|
||||
|
|
@ -67,13 +65,5 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
|
|||
S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
|
||||
S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
|
||||
}
|
||||
if (alu->last) {
|
||||
if (alu->nliteral && !alu->literal_added) {
|
||||
R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
|
||||
}
|
||||
for (i = 0; i < alu->nliteral; i++) {
|
||||
bc->bytecode[id++] = alu->value[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue