mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-26 11:10:49 +02:00
nine: use separate register for aL emulation
NIR loop unrolling is only working if the loop counter is a scalar. So keep the loop counter separate and move the aL emulation and the aL increment to a new register. This allows loop unrolling with vec4 backends where unconditional scalarizing of phi nodes is undesirable, like for example r300. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Axel Davy <davyaxel0@gmail.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7222 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21243>
This commit is contained in:
parent
ac993ae828
commit
5825f9dd68
1 changed files with 25 additions and 14 deletions
|
|
@ -485,7 +485,8 @@ struct shader_translator
|
|||
struct ureg_dst t[8]; /* scratch TEMPs */
|
||||
struct ureg_src vC[2]; /* PS color in */
|
||||
struct ureg_src vT[8]; /* PS texcoord in */
|
||||
struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
|
||||
struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
|
||||
struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
|
||||
} regs;
|
||||
unsigned num_temp; /* ARRAY_SIZE(regs.r) */
|
||||
unsigned num_scratch;
|
||||
|
|
@ -935,6 +936,8 @@ tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
|
|||
if (ureg_dst_is_undef(tx->regs.rL[l])) {
|
||||
/* loop or rep ctr creation */
|
||||
tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
|
||||
if (loop_or_rep)
|
||||
tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
|
||||
tx->loop_or_rep[l] = loop_or_rep;
|
||||
}
|
||||
/* loop - rep - endloop - endrep not allowed */
|
||||
|
|
@ -943,7 +946,7 @@ tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
|
|||
return tx->regs.rL[l];
|
||||
}
|
||||
|
||||
static struct ureg_src
|
||||
static struct ureg_dst
|
||||
tx_get_loopal(struct shader_translator *tx)
|
||||
{
|
||||
int loop_level = tx->loop_depth - 1;
|
||||
|
|
@ -951,13 +954,13 @@ tx_get_loopal(struct shader_translator *tx)
|
|||
while (loop_level >= 0) {
|
||||
/* handle loop - rep - endrep - endloop case */
|
||||
if (tx->loop_or_rep[loop_level])
|
||||
/* the value is in the loop counter y component (nine implementation) */
|
||||
return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
|
||||
/* the aL value is in the Y component (nine implementation) */
|
||||
return tx->regs.aL[loop_level];
|
||||
loop_level--;
|
||||
}
|
||||
|
||||
DBG("aL counter requested outside of loop\n");
|
||||
return ureg_src_undef();
|
||||
return ureg_dst_undef();
|
||||
}
|
||||
|
||||
static inline unsigned *
|
||||
|
|
@ -1134,9 +1137,11 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
|
|||
if (ureg_dst_is_undef(tx->regs.address))
|
||||
tx->regs.address = ureg_DECL_address(ureg);
|
||||
if (!tx->native_integers)
|
||||
ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
|
||||
ureg_ARR(ureg, tx->regs.address,
|
||||
ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
|
||||
else
|
||||
ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
|
||||
ureg_UARL(ureg, tx->regs.address,
|
||||
ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
|
||||
src = ureg_src(tx->regs.address);
|
||||
break;
|
||||
case D3DSPR_MISCTYPE:
|
||||
|
|
@ -1789,15 +1794,20 @@ DECL_SPECIAL(LOOP)
|
|||
unsigned *label;
|
||||
struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
|
||||
struct ureg_dst ctr;
|
||||
struct ureg_dst aL;
|
||||
struct ureg_dst tmp;
|
||||
struct ureg_src ctrx;
|
||||
|
||||
label = tx_bgnloop(tx);
|
||||
ctr = tx_get_loopctr(tx, TRUE);
|
||||
aL = tx_get_loopal(tx);
|
||||
ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
|
||||
|
||||
/* src: num_iterations - start_value of al - step for al - 0 */
|
||||
ureg_MOV(ureg, ctr, src);
|
||||
/* src: num_iterations*/
|
||||
ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
|
||||
ureg_scalar(src, TGSI_SWIZZLE_X));
|
||||
/* al: unused - start_value of al - step for al - unused */
|
||||
ureg_MOV(ureg, aL, src);
|
||||
ureg_BGNLOOP(tx->ureg, label);
|
||||
tmp = tx_scratch_scalar(tx);
|
||||
/* Initially ctr.x contains the number of iterations.
|
||||
|
|
@ -1837,22 +1847,23 @@ DECL_SPECIAL(ENDLOOP)
|
|||
{
|
||||
struct ureg_program *ureg = tx->ureg;
|
||||
struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
|
||||
struct ureg_dst al = tx_get_loopal(tx);
|
||||
struct ureg_dst dst_ctrx, dst_al;
|
||||
struct ureg_src src_ctr, al_counter;
|
||||
|
||||
dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
|
||||
dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
|
||||
dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
|
||||
src_ctr = ureg_src(ctr);
|
||||
al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
|
||||
al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
|
||||
|
||||
/* ctr.x -= 1
|
||||
* ctr.y (aL) += step */
|
||||
* al.y (aL) += step */
|
||||
if (!tx->native_integers) {
|
||||
ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
|
||||
ureg_ADD(ureg, dst_al, src_ctr, al_counter);
|
||||
ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
|
||||
} else {
|
||||
ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
|
||||
ureg_UADD(ureg, dst_al, src_ctr, al_counter);
|
||||
ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
|
||||
}
|
||||
ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
|
||||
return D3D_OK;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue