From 1edeeb3d1369f95cbe0b47123e14562d684c7058 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 13 Mar 2024 08:19:57 -0400 Subject: [PATCH] freedreno/a7xx: Document partial workgroup register This lets us implement "unaligned" dispatches without inserting shader code. Part-of: --- src/freedreno/computerator/a6xx.cc | 6 +++--- src/freedreno/registers/adreno/a6xx.xml | 4 ++-- src/freedreno/vulkan/tu_device.cc | 2 +- src/freedreno/vulkan/tu_shader.cc | 6 +++--- src/gallium/drivers/freedreno/a6xx/fd6_compute.cc | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/freedreno/computerator/a6xx.cc b/src/freedreno/computerator/a6xx.cc index 755ff5bb95b..2b38ad3cd4d 100644 --- a/src/freedreno/computerator/a6xx.cc +++ b/src/freedreno/computerator/a6xx.cc @@ -499,9 +499,9 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], .localsizez = local_size[2] - 1, )); if (CHIP == A7XX) { - OUT_REG(ring, A7XX_HLSQ_CS_LOCAL_SIZE(.localsizex = local_size[0] - 1, - .localsizey = local_size[1] - 1, - .localsizez = local_size[2] - 1, )); + OUT_REG(ring, A7XX_HLSQ_CS_LAST_LOCAL_SIZE(.localsizex = local_size[0] - 1, + .localsizey = local_size[1] - 1, + .localsizez = local_size[2] - 1, )); } OUT_REG(ring, HLSQ_CS_NDRANGE_1(CHIP, diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index fbf40878651..050d9ae8980 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -5723,8 +5723,8 @@ to upconvert to 32b float internally? - - + + diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index f4067d8cfc6..4070fbb3f85 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2302,7 +2302,7 @@ tu_init_cmdbuf_start_a725_quirk(struct tu_device *device) HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0), HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1), HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0)); - tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE( + tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LAST_LOCAL_SIZE( .localsizex = 255, .localsizey = 0, .localsizez = 0)); diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index b274c2f14f2..6c1023c9e25 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1553,9 +1553,9 @@ tu6_emit_cs_config(struct tu_cs *cs, WORKITEMRASTORDER_TILED, )); tu_cs_emit_regs( - cs, A7XX_HLSQ_CS_LOCAL_SIZE(.localsizex = v->local_size[0] - 1, - .localsizey = v->local_size[1] - 1, - .localsizez = v->local_size[2] - 1, )); + cs, A7XX_HLSQ_CS_LAST_LOCAL_SIZE(.localsizex = v->local_size[0] - 1, + .localsizey = v->local_size[1] - 1, + .localsizez = v->local_size[2] - 1, )); tu_cs_emit_regs(cs, A7XX_SP_CS_UNKNOWN_A9BE(0)); // Sometimes is 0x08000000 } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index fc619a96b8d..8f701b13a69 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -55,7 +55,7 @@ cs_program_emit_local_size(struct fd_context *ctx, struct fd_ringbuffer *ring, ); OUT_REG(ring, - A7XX_HLSQ_CS_LOCAL_SIZE( + A7XX_HLSQ_CS_LAST_LOCAL_SIZE( .localsizex = local_size[0] - 1, .localsizey = local_size[1] - 1, .localsizez = local_size[2] - 1,