freedreno/a3xx/compiler: add LIT

Needed by glxgears and etuxracer ;-)

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2013-09-01 11:35:56 -04:00
parent cb9e07aa84
commit 64c134cedb

View file

@ -65,10 +65,11 @@ static unsigned regmask_idx(struct ir3_register *reg)
static void regmask_set(regmask_t regmask, struct ir3_register *reg,
unsigned wrmask)
{
unsigned ridx = regmask_idx(reg) & ~0x3;
unsigned i;
for (i = 0; i < 4; i++) {
if (wrmask & (1 << i)) {
unsigned idx = regmask_idx(reg) + i;
unsigned idx = ridx + i;
regmask[idx / 8] |= 1 << (idx % 8);
}
}
@ -179,8 +180,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, tokens);
if (ret != TGSI_PARSE_OK)
@ -602,6 +602,144 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
* native instructions:
*/
/* LIT - Light Coefficients:
* dst.x = 1.0
* dst.y = max(src.x, 0.0)
* dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
* dst.w = 1.0
*
* max.f tmp.y, src.y, {0.0}
* nop
* max.f tmp.z, src.w, {-128.0}
* cmps.f.ge tmp.w, (neg)src.x, {0.0}
* max.f dst.y, src.x, {0.0}
* nop
* min.f tmp.x, tmp.z, {128.0}
* add.s tmp.z, tmp.w, -1
* log2 tmp.y, tmp.y
* (rpt1)nop
* (ss)mul.f tmp.x, tmp.x, tmp.y
* mov.f16f16 dst.x, {1.0}
* mov.f16f16 dst.w, {1.0}
* (rpt3)nop
* exp2 tmp.x, tmp.x
* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x
*/
static void
trans_lit(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct ir3_instruction *instr;
struct ir3_register *r;
struct tgsi_dst_register tmp_dst;
struct tgsi_src_register *tmp_src;
struct tgsi_src_register constval0, constval1, constval128;
struct tgsi_src_register *src = &inst->Src[0].Register;
struct tgsi_dst_register *dst = get_dst(ctx, inst);
tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
if (is_const(src))
src = get_unconst(ctx, src);
get_immediate(ctx, &constval0, fui(0.0));
get_immediate(ctx, &constval1, fui(1.0));
get_immediate(ctx, &constval128, fui(128.0));
/* max.f tmp.y, src.y, {0.0} */
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_Y);
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
/* nop */
ir3_instr_create(ctx->ir, 0, OPC_NOP);
/* max.f tmp.z, src.w, {-128.0} */
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_W);
add_src_reg(ctx, instr, &constval128,
constval128.SwizzleX)->flags |= IR3_REG_NEGATE;
/* cmps.f.ge tmp.w, (neg)src.x, {0.0} */
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_W);
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X)->flags |= IR3_REG_NEGATE;
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
instr->cat2.condition = IR3_COND_GE;
/* max.f dst.y, src.x, {0.0} */
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Y);
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
/* nop */
ir3_instr_create(ctx->ir, 0, OPC_NOP);
/* min.f tmp.x, tmp.z, {128.0} */
instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
add_src_reg(ctx, instr, &constval128, constval128.SwizzleX);
/* add.s tmp.z, tmp.w, -1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_W);
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
/* log2 tmp.y, tmp.y */
instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_Y);
/* (rpt1)nop */
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
/* (ss)mul.f tmp.x, tmp.x, tmp.y */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
/* mov.f16f16 dst.x, {1.0} */
instr = ir3_instr_create(ctx->ir, 1, 0);
instr->cat1.src_type = get_ftype(ctx);
instr->cat1.dst_type = get_ftype(ctx);
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
/* mov.f16f16 dst.w, {1.0} */
instr = ir3_instr_create(ctx->ir, 1, 0);
instr->cat1.src_type = get_ftype(ctx);
instr->cat1.dst_type = get_ftype(ctx);
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_W);
add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
/* (rpt3)nop */
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 3;
/* exp2 tmp.x, tmp.x */
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
/* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x */
instr = ir3_instr_create(ctx->ir, 3,
ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Z);
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
put_dst(ctx, inst, dst);
}
static inline void
get_swiz(unsigned *swiz, struct tgsi_src_register *src)
{
@ -1266,6 +1404,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
INSTR(MOV, instr_cat1),
INSTR(LIT, trans_lit),
INSTR(RCP, instr_cat4, .opc = OPC_RCP),
INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),