From 48902771ad795dc310dddf7e36b4ff9ebb16bb9b Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Mon, 2 Mar 2026 13:04:33 +0100 Subject: [PATCH] r600: fix atomic buffer offset The atomic offset implementation was incomplete. This change was tested on cayman, it fixes all the variants of this test: khr-gl4[2-6]/shader_atomic_counters/advanced-usage-multi-stage: fail pass khr-gles31/core/shader_atomic_counters/advanced-usage-multi-stage: fail pass Fixes: 06993e4ee350 ("r600: add support for hw atomic counters. (v3)") Signed-off-by: Patrick Lerda Part-of: --- src/gallium/drivers/r600/evergreen_state.c | 26 +++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 13a304238ed..599f62e93b6 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -5253,6 +5253,7 @@ void eg_trace_emit(struct r600_context *rctx) static void evergreen_emit_set_append_cnt(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5260,7 +5261,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, resource, RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; @@ -5278,6 +5279,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, static void evergreen_emit_event_write_eos(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5287,7 +5289,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, resource, RADEON_USAGE_WRITE | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2; assert(atomic->count == 1); @@ -5307,6 +5309,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, static void cayman_emit_event_write_eos(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5315,7 +5318,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, resource, RADEON_USAGE_WRITE | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE) event = EVENT_TYPE_CS_DONE; @@ -5333,6 +5336,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, static void cayman_write_count_to_gds(struct r600_context *rctx, const struct r600_shader_atomic *atomic, struct r600_resource *resource, + const unsigned buffer_offset, const uint32_t pkt_flags) { struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; @@ -5340,7 +5344,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, resource, RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_BUFFER); - uint64_t dst_offset = resource->gpu_address + (atomic->start * 4); + uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset; radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags); radeon_emit(cs, dst_offset & 0xffffffff); @@ -5466,13 +5470,14 @@ void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, for (int i = 0; i < global_atomic_count; i++) { const struct r600_shader_atomic *atomic = &combined_atomics[i]; - struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer); + const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id]; + struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer); assert(resource); if (rctx->b.gfx_level == CAYMAN) - cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags); + cayman_write_count_to_gds(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); else - evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); + evergreen_emit_set_append_cnt(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); } } @@ -5496,13 +5501,14 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, for (int i = 0; i < global_atomic_count; i++) { const struct r600_shader_atomic *atomic = &combined_atomics[i]; - struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer); + const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id]; + struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer); assert(resource); if (rctx->b.gfx_level == CAYMAN) - cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags); + cayman_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); else - evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags); + evergreen_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags); } if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)