From 5ba35fd6ccfc2a6145a2e6823098fedc4ee30846 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 7 Dec 2022 20:04:55 -0800 Subject: [PATCH] freedreno: fix compute shared_size underflow It caused ~5% of perf regression for some gfxbench benchmarks. Fixes: b8d10d9e87a ("gallium: split up req_local_mem") Part-of: --- src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 9d9df192f94..dec35955acb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -73,7 +73,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); uint32_t shared_size = - MAX2(((int)v->cs.req_local_mem + variable_shared_size- 1) / 1024, 1); + MAX2(((int)(v->cs.req_local_mem + variable_shared_size) - 1) / 1024, 1); OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) | A6XX_SP_CS_UNKNOWN_A9B1_UNK6);