freedreno/a6xx: set .MERGEREGS based on variant

Also set HALFREGFOOTPRINT, since in the non-mergeregs case this will be
non-zero.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5458>
This commit is contained in:
Rob Clark 2020-06-13 09:31:11 -07:00 committed by Marge Bot
parent 1cc4cf141a
commit c6632c087d
2 changed files with 12 additions and 6 deletions

View file

@ -97,7 +97,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring, A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));

View file

@ -439,19 +439,17 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
0xfc000000);
enum a3xx_threadsize vssz;
uint32_t vsregs;
if (ds || hs) {
vssz = TWO_QUADS;
vsregs = 0;
} else {
vssz = FOUR_QUADS;
vsregs = A6XX_SP_VS_CTRL_REG0_MERGEDREGS;
}
OUT_PKT4(ring, REG_A6XX_SP_VS_CTRL_REG0, 1);
OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(vssz) |
A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) |
vsregs |
A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vs->info.max_half_reg + 1) |
COND(vs->mergedregs, A6XX_SP_VS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack) |
COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
@ -537,6 +535,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1);
OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) |
A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(hs->info.max_half_reg + 1) |
COND(hs->mergedregs, A6XX_SP_HS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack) |
COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE));
@ -547,6 +547,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) |
A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(ds->info.max_half_reg + 1) |
COND(ds->mergedregs, A6XX_SP_DS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack) |
COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE));
@ -656,7 +658,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) |
0x1000000 |
A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack) |
COND(fs->need_pixlod, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
@ -719,6 +722,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
OUT_PKT4(ring, REG_A6XX_SP_GS_CTRL_REG0, 1);
OUT_RING(ring, A6XX_SP_GS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(gs->info.max_reg + 1) |
A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(gs->info.max_half_reg + 1) |
COND(gs->mergedregs, A6XX_SP_GS_CTRL_REG0_MERGEDREGS) |
A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(gs->branchstack) |
COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE));