mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 12:40:23 +01:00
freedreno/a3xx/compiler: add LIT
Needed by glxgears and etuxracer ;-) Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
parent
cb9e07aa84
commit
64c134cedb
1 changed files with 142 additions and 3 deletions
|
|
@ -65,10 +65,11 @@ static unsigned regmask_idx(struct ir3_register *reg)
|
|||
static void regmask_set(regmask_t regmask, struct ir3_register *reg,
|
||||
unsigned wrmask)
|
||||
{
|
||||
unsigned ridx = regmask_idx(reg) & ~0x3;
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (wrmask & (1 << i)) {
|
||||
unsigned idx = regmask_idx(reg) + i;
|
||||
unsigned idx = ridx + i;
|
||||
regmask[idx / 8] |= 1 << (idx % 8);
|
||||
}
|
||||
}
|
||||
|
|
@ -179,8 +180,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
|
|||
ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
|
||||
|
||||
so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
|
||||
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
|
||||
ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
|
||||
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
|
||||
|
||||
ret = tgsi_parse_init(&ctx->parser, tokens);
|
||||
if (ret != TGSI_PARSE_OK)
|
||||
|
|
@ -602,6 +602,144 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
|
|||
* native instructions:
|
||||
*/
|
||||
|
||||
/* LIT - Light Coefficients:
|
||||
* dst.x = 1.0
|
||||
* dst.y = max(src.x, 0.0)
|
||||
* dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0.0
|
||||
* dst.w = 1.0
|
||||
*
|
||||
* max.f tmp.y, src.y, {0.0}
|
||||
* nop
|
||||
* max.f tmp.z, src.w, {-128.0}
|
||||
* cmps.f.ge tmp.w, (neg)src.x, {0.0}
|
||||
* max.f dst.y, src.x, {0.0}
|
||||
* nop
|
||||
* min.f tmp.x, tmp.z, {128.0}
|
||||
* add.s tmp.z, tmp.w, -1
|
||||
* log2 tmp.y, tmp.y
|
||||
* (rpt1)nop
|
||||
* (ss)mul.f tmp.x, tmp.x, tmp.y
|
||||
* mov.f16f16 dst.x, {1.0}
|
||||
* mov.f16f16 dst.w, {1.0}
|
||||
* (rpt3)nop
|
||||
* exp2 tmp.x, tmp.x
|
||||
* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x
|
||||
*/
|
||||
static void
|
||||
trans_lit(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct ir3_register *r;
|
||||
struct tgsi_dst_register tmp_dst;
|
||||
struct tgsi_src_register *tmp_src;
|
||||
struct tgsi_src_register constval0, constval1, constval128;
|
||||
struct tgsi_src_register *src = &inst->Src[0].Register;
|
||||
struct tgsi_dst_register *dst = get_dst(ctx, inst);
|
||||
|
||||
tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
|
||||
|
||||
if (is_const(src))
|
||||
src = get_unconst(ctx, src);
|
||||
|
||||
get_immediate(ctx, &constval0, fui(0.0));
|
||||
get_immediate(ctx, &constval1, fui(1.0));
|
||||
get_immediate(ctx, &constval128, fui(128.0));
|
||||
|
||||
/* max.f tmp.y, src.y, {0.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
|
||||
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_Y);
|
||||
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
|
||||
|
||||
/* nop */
|
||||
ir3_instr_create(ctx->ir, 0, OPC_NOP);
|
||||
|
||||
/* max.f tmp.z, src.w, {-128.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
|
||||
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_W);
|
||||
add_src_reg(ctx, instr, &constval128,
|
||||
constval128.SwizzleX)->flags |= IR3_REG_NEGATE;
|
||||
|
||||
/* cmps.f.ge tmp.w, (neg)src.x, {0.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_W);
|
||||
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X)->flags |= IR3_REG_NEGATE;
|
||||
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
|
||||
instr->cat2.condition = IR3_COND_GE;
|
||||
|
||||
/* max.f dst.y, src.x, {0.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
|
||||
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Y);
|
||||
add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
|
||||
|
||||
/* nop */
|
||||
ir3_instr_create(ctx->ir, 0, OPC_NOP);
|
||||
|
||||
/* min.f tmp.x, tmp.z, {128.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
|
||||
add_src_reg(ctx, instr, &constval128, constval128.SwizzleX);
|
||||
|
||||
/* add.s tmp.z, tmp.w, -1 */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Z);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_W);
|
||||
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
|
||||
|
||||
/* log2 tmp.y, tmp.y */
|
||||
instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
|
||||
r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_Y);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
|
||||
regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_Y);
|
||||
|
||||
/* (rpt1)nop */
|
||||
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
|
||||
|
||||
/* (ss)mul.f tmp.x, tmp.x, tmp.y */
|
||||
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
|
||||
add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Y);
|
||||
|
||||
/* mov.f16f16 dst.x, {1.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 1, 0);
|
||||
instr->cat1.src_type = get_ftype(ctx);
|
||||
instr->cat1.dst_type = get_ftype(ctx);
|
||||
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
|
||||
|
||||
/* mov.f16f16 dst.w, {1.0} */
|
||||
instr = ir3_instr_create(ctx->ir, 1, 0);
|
||||
instr->cat1.src_type = get_ftype(ctx);
|
||||
instr->cat1.dst_type = get_ftype(ctx);
|
||||
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_W);
|
||||
add_src_reg(ctx, instr, &constval1, constval1.SwizzleX);
|
||||
|
||||
/* (rpt3)nop */
|
||||
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 3;
|
||||
|
||||
/* exp2 tmp.x, tmp.x */
|
||||
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
|
||||
r = add_dst_reg(ctx, instr, &tmp_dst, TGSI_SWIZZLE_X);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
|
||||
regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
|
||||
|
||||
/* (ss)sel.f16 dst.z, {0.0}, tmp.z, tmp.x */
|
||||
instr = ir3_instr_create(ctx->ir, 3,
|
||||
ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
|
||||
add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_Z);
|
||||
add_src_reg(ctx, instr, &constval0, constval0.SwizzleX);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_Z);
|
||||
add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
|
||||
|
||||
put_dst(ctx, inst, dst);
|
||||
}
|
||||
|
||||
static inline void
|
||||
get_swiz(unsigned *swiz, struct tgsi_src_register *src)
|
||||
{
|
||||
|
|
@ -1266,6 +1404,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
|
|||
[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
|
||||
|
||||
INSTR(MOV, instr_cat1),
|
||||
INSTR(LIT, trans_lit),
|
||||
INSTR(RCP, instr_cat4, .opc = OPC_RCP),
|
||||
INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
|
||||
INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue