From c785514d5e439a461c63dd6d1f524fe196dac201 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Mon, 2 Mar 2026 13:04:33 +0100 Subject: [PATCH] r600: fix atomic buffer offset The atomic offset implementation was incomplete. This change was tested on cayman, it fixes all the variants of this test: khr-gl4[2-6]/shader_atomic_counters/advanced-usage-multi-stage: fail pass khr-gles31/core/shader_atomic_counters/advanced-usage-multi-stage: fail pass Fixes: 06993e4ee350 ("r600: add support for hw atomic counters. (v3)") Signed-off-by: Patrick Lerda (cherry picked from commit 48902771ad795dc310dddf7e36b4ff9ebb16bb9b) Part-of: --- .pick_status.json | 2 +- src/gallium/drivers/r600/evergreen_state.c | 26 +++++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index f6c46648943..cc5120ce69e 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4334,7 +4334,7 @@ "description": "r600: fix atomic buffer offset", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "06993e4ee350b9c2ab1e3ee7686878add3900d39", "notes": null diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 577e138b59e..05279a3c8f3 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -5252,6 +5252,7 @@ void eg_trace_emit(struct r600_context *rctx) static void evergreen_emit_set_append_cnt(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5259,7 +5260,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, resource, RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; @@ -5277,6 +5278,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, static void evergreen_emit_event_write_eos(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5286,7 +5288,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, resource, RADEON_USAGE_WRITE | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2; assert(atomic->count == 1); @@ -5306,6 +5308,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, static void cayman_emit_event_write_eos(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5314,7 +5317,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, resource, RADEON_USAGE_WRITE | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) event = EVENT_TYPE_CS_DONE; @@ -5332,6 +5335,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, static void cayman_write_count_to_gds(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, const uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5339,7 +5343,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, resource, RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags); radeon_emit(cs, dst_offset & 0xffffffff); @@ -5465,13 +5469,14 @@ void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, for (int i = 0; i < global_atomic_count; i++) { const struct r600_shader_atomic *atomic = &combined_atomics[i]; - struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer); + const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id]; + struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer); assert(resource); if (rctx->b.gfx_level == CAYMAN) - cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags); + cayman_write_count_to_gds(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); else - evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); + evergreen_emit_set_append_cnt(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); } } @@ -5495,13 +5500,14 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, for (int i = 0; i < global_atomic_count; i++) { const struct r600_shader_atomic *atomic = &combined_atomics[i]; - struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer); + const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id]; + struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer); assert(resource); if (rctx->b.gfx_level == CAYMAN) - cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags); + cayman_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); else - evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags); + evergreen_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); } if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)