mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 02:20:11 +01:00
freedreno/a6xx: Support variable wg size
If local wg size isn't known at compile time, we need to move some of the state emit out of the state object and into IB2 cmdstream. This still doesn't account for the fact that RA currently must assume the worst case, meaning limiting cl kernels to a miniumum number of regs and spilling excessively. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30835>
This commit is contained in:
parent
1569219f51
commit
f3211e243f
1 changed files with 49 additions and 24 deletions
|
|
@ -23,7 +23,47 @@
|
|||
#include "fd6_emit.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
/* maybe move to fd6_program? */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit_local_size(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *v, uint16_t local_size[3])
|
||||
{
|
||||
/*
|
||||
* Devices that do not support double threadsize take the threadsize from
|
||||
* A6XX_HLSQ_FS_CNTL_0_THREADSIZE instead of A6XX_HLSQ_CS_CNTL_1_THREADSIZE
|
||||
* which is always set to THREAD128.
|
||||
*/
|
||||
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
|
||||
enum a6xx_threadsize thrsz_cs = ctx->screen->info->a6xx
|
||||
.supports_double_threadsize ? thrsz : THREAD128;
|
||||
|
||||
if (CHIP == A7XX) {
|
||||
unsigned tile_height = (local_size[1] % 8 == 0) ? 3
|
||||
: (local_size[1] % 4 == 0) ? 5
|
||||
: (local_size[1] % 2 == 0) ? 9
|
||||
: 17;
|
||||
|
||||
OUT_REG(ring,
|
||||
HLSQ_CS_CNTL_1(
|
||||
CHIP,
|
||||
.linearlocalidregid = INVALID_REG,
|
||||
.threadsize = thrsz_cs,
|
||||
.workgrouprastorderzfirsten = true,
|
||||
.wgtilewidth = 4,
|
||||
.wgtileheight = tile_height,
|
||||
)
|
||||
);
|
||||
|
||||
OUT_REG(ring,
|
||||
A7XX_HLSQ_CS_LOCAL_SIZE(
|
||||
.localsizex = local_size[0] - 1,
|
||||
.localsizey = local_size[1] - 1,
|
||||
.localsizez = local_size[2] - 1,
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
|
|
@ -86,22 +126,6 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
|
||||
}
|
||||
} else {
|
||||
unsigned tile_height = (v->local_size[1] % 8 == 0) ? 3
|
||||
: (v->local_size[1] % 4 == 0) ? 5
|
||||
: (v->local_size[1] % 2 == 0) ? 9
|
||||
: 17;
|
||||
|
||||
OUT_REG(ring,
|
||||
HLSQ_CS_CNTL_1(
|
||||
CHIP,
|
||||
.linearlocalidregid = regid(63, 0),
|
||||
.threadsize = thrsz_cs,
|
||||
.workgrouprastorderzfirsten = true,
|
||||
.wgtilewidth = 4,
|
||||
.wgtileheight = tile_height,
|
||||
)
|
||||
);
|
||||
|
||||
OUT_REG(ring, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64));
|
||||
OUT_REG(ring,
|
||||
A6XX_SP_CS_CNTL_0(
|
||||
|
|
@ -121,16 +145,12 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
: WORKITEMRASTORDER_TILED,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring,
|
||||
A7XX_HLSQ_CS_LOCAL_SIZE(
|
||||
.localsizex = v->local_size[0] - 1,
|
||||
.localsizey = v->local_size[1] - 1,
|
||||
.localsizez = v->local_size[2] - 1,
|
||||
)
|
||||
);
|
||||
OUT_REG(ring, A7XX_SP_CS_UNKNOWN_A9BE(0)); // Sometimes is 0x08000000
|
||||
}
|
||||
|
||||
if (!v->local_size_variable)
|
||||
cs_program_emit_local_size<CHIP>(ctx, ring, v, v->local_size);
|
||||
|
||||
fd6_emit_shader<CHIP>(ctx, ring, v);
|
||||
}
|
||||
|
||||
|
|
@ -216,6 +236,11 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
/* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
|
||||
const unsigned work_dim = info->work_dim ? info->work_dim : 3;
|
||||
|
||||
if (cs->v->local_size_variable) {
|
||||
uint16_t wg[] = {local_size[0], local_size[1], local_size[2]};
|
||||
cs_program_emit_local_size<CHIP>(ctx, ring, cs->v, wg);
|
||||
}
|
||||
|
||||
OUT_REG(ring,
|
||||
HLSQ_CS_NDRANGE_0(
|
||||
CHIP,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue