mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
freedreno,tu: Limit the amount of instructions preloaded into icache
Inferring from blob's cmdstream the size of shader instruction cache for: - a630 is 64 - a650 is 128 - a660 is 128 On a650 and a660 gpu could hang if we exceed the limit. Though it is not reproducible with computerator or a single amber test. Also while blob limits the size to 128 - Turnip still hangs with it but does not hang with the limit of 127. On a630 there seem to be no hang when limit is exceeded. Fixes the hang of compute shader in Alien Isolation on a650/a660. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14044>
This commit is contained in:
parent
da24bb17a8
commit
e63ffc2f04
5 changed files with 23 additions and 3 deletions
|
|
@ -60,6 +60,12 @@ struct fd_dev_info {
|
|||
|
||||
uint32_t reg_size_vec4;
|
||||
|
||||
/* The size (in instrlen units (128 bytes)) of instruction cache where
|
||||
* we preload a shader. Loading more than this could trigger a hang
|
||||
* on gen3 and later.
|
||||
*/
|
||||
uint32_t instr_cache_size;
|
||||
|
||||
/* Whether the PC_MULTIVIEW_MASK register exists. */
|
||||
bool supports_multiview_mask;
|
||||
|
||||
|
|
|
|||
|
|
@ -206,6 +206,7 @@ add_gpus([
|
|||
a6xx_gen1 = dict(
|
||||
fibers_per_sp = 128 * 16,
|
||||
reg_size_vec4 = 96,
|
||||
instr_cache_size = 64,
|
||||
ccu_cntl_gmem_unk2 = True,
|
||||
indirect_draw_wfm_quirk = True,
|
||||
depth_bounds_require_depth_test_quirk = True,
|
||||
|
|
@ -218,6 +219,7 @@ a6xx_gen1 = dict(
|
|||
a6xx_gen2 = dict(
|
||||
fibers_per_sp = 128 * 4 * 16,
|
||||
reg_size_vec4 = 96,
|
||||
instr_cache_size = 64, # TODO
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
indirect_draw_wfm_quirk = True,
|
||||
|
|
@ -231,6 +233,8 @@ a6xx_gen2 = dict(
|
|||
a6xx_gen3 = dict(
|
||||
fibers_per_sp = 128 * 2 * 16,
|
||||
reg_size_vec4 = 64,
|
||||
# Blob limits it to 128 but we hang with 128
|
||||
instr_cache_size = 127,
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
tess_use_shared = True,
|
||||
|
|
@ -249,6 +253,8 @@ a6xx_gen3 = dict(
|
|||
a6xx_gen4 = dict(
|
||||
fibers_per_sp = 128 * 2 * 16,
|
||||
reg_size_vec4 = 64,
|
||||
# Blob limits it to 128 but we hang with 128
|
||||
instr_cache_size = 127,
|
||||
supports_multiview_mask = True,
|
||||
has_z24uint_s8uint = True,
|
||||
tess_use_shared = True,
|
||||
|
|
|
|||
|
|
@ -196,12 +196,14 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
|
|||
OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2);
|
||||
OUT_RELOC(ring, v->bo, 0, 0, 0);
|
||||
|
||||
uint32_t shader_preload_size =
|
||||
MIN2(v->instrlen, a6xx_backend->info->a6xx.instr_cache_size);
|
||||
OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen));
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
|
||||
OUT_RELOC(ring, v->bo, 0, 0, 0);
|
||||
|
||||
if (v->pvtmem_size > 0) {
|
||||
|
|
|
|||
|
|
@ -545,12 +545,15 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size));
|
||||
|
||||
uint32_t shader_preload_size =
|
||||
MIN2(xs->instrlen, cs->device->physical_device->info->a6xx.instr_cache_size);
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(xs->instrlen));
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
|
||||
tu_cs_emit_qw(cs, binary_iova);
|
||||
|
||||
/* emit immediates */
|
||||
|
|
|
|||
|
|
@ -143,12 +143,15 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
OUT_PKT4(ring, hw_stack_offset, 1);
|
||||
OUT_RING(ring, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(per_sp_size));
|
||||
|
||||
uint32_t shader_preload_size =
|
||||
MIN2(so->instrlen, ctx->screen->info->a6xx.instr_cache_size);
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(so->type), 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(so->instrlen));
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
|
||||
OUT_RELOC(ring, so->bo, 0, 0, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue