From 7be5455796facbe35cf1f1bdbefa83759b2e3b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 12 Dec 2010 15:13:39 +0100 Subject: [PATCH 1/3] r600g: Why all this fiddling with tgsi_helper_copy? tgsi_helper_copy is used on several occasions to copy a temporary result into the real destination register to emulate writemasks for OP3 and reduction operations. According to R600 ISA that's unnecessary. This patch fixes this use for MAD, CMP and DP4. --- src/gallium/drivers/r600/r600_shader.c | 62 +++++++++++++++++--------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a2fec205051..268a633c518 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1606,6 +1606,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = 0; + + for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + lasti = i; + } + } r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1613,26 +1620,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; - /* do it in 2 step as op3 doesn't support writemask */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_dp(struct r600_shader_ctx *ctx) @@ -1655,7 +1668,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + if(inst->Dst[0].Register.WriteMask & (1 << i)) { + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } else { + alu.dst.sel = ctx->temp_reg; + } alu.dst.chan = i; alu.dst.write = 1; /* handle some special cases */ @@ -1689,7 +1708,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_tex(struct r600_shader_ctx *ctx) @@ -2019,8 +2038,14 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int use_temp = 0; int i, r; + int lasti = 0; + + for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + lasti = i; + } + } r = tgsi_split_constant(ctx, r600_src); if (r) @@ -2029,10 +2054,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (r) return r; - if (inst->Dst[0].Register.WriteMask != 0xf) - use_temp = 1; + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); alu.src[0] = r600_src[0]; @@ -2044,24 +2069,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) alu.src[2] = r600_src[1]; alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - if (use_temp) - alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - if (use_temp) - return tgsi_helper_copy(ctx, inst); return 0; } From a1146c1373e66d429afbb92ecb08a6fd67c3e224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 14 Dec 2010 19:32:08 +0100 Subject: [PATCH 2/3] r600g: DP4 also supports writemasking --- src/gallium/drivers/r600/r600_shader.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 268a633c518..59c080fcbba 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1668,15 +1668,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - if(inst->Dst[0].Register.WriteMask & (1 << i)) { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } else { - alu.dst.sel = ctx->temp_reg; - } + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; - alu.dst.write = 1; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: From 93a95ad8ff1d543f886f123029d1329513729c4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 12 Dec 2010 15:37:54 +0100 Subject: [PATCH 3/3] r600g: texture instructions also work fine with TGSI_FILE_INPUT --- src/gallium/drivers/r600/r600_shader.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 59c080fcbba..2bf116c90bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1717,7 +1717,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; + boolean src_not_temp = + inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT; uint32_t lit_vals[4]; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;