r600: fix atomic buffer offset

The atomic offset implementation was incomplete.

This change was tested on cayman, it fixes all the
variants of this test:
khr-gl4[2-6]/shader_atomic_counters/advanced-usage-multi-stage: fail pass
khr-gles31/core/shader_atomic_counters/advanced-usage-multi-stage: fail pass

Fixes: 06993e4ee3 ("r600: add support for hw atomic counters. (v3)")
Signed-off-by: Patrick Lerda <patrick9876@free.fr>
(cherry picked from commit 48902771ad)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41269>
This commit is contained in:
Patrick Lerda 2026-03-02 13:04:33 +01:00 committed by Eric Engestrom
parent 5ec6772843
commit c785514d5e
2 changed files with 17 additions and 11 deletions

View file

@ -4334,7 +4334,7 @@
"description": "r600: fix atomic buffer offset",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "06993e4ee350b9c2ab1e3ee7686878add3900d39",
"notes": null

View file

@ -5252,6 +5252,7 @@ void eg_trace_emit(struct r600_context *rctx)
static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
const struct r600_shader_atomic *atomic,
struct r600_resource *resource,
const unsigned buffer_offset,
uint32_t pkt_flags)
{
struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
@ -5259,7 +5260,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
resource,
RADEON_USAGE_READ |
RADEON_PRIO_SHADER_RW_BUFFER);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset;
uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0;
uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4 - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
@ -5277,6 +5278,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
static void evergreen_emit_event_write_eos(struct r600_context *rctx,
const struct r600_shader_atomic *atomic,
struct r600_resource *resource,
const unsigned buffer_offset,
uint32_t pkt_flags)
{
struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
@ -5286,7 +5288,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx,
resource,
RADEON_USAGE_WRITE |
RADEON_PRIO_SHADER_RW_BUFFER);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset;
uint32_t reg_val = (base_reg_0 + atomic->hw_idx * 4) >> 2;
assert(atomic->count == 1);
@ -5306,6 +5308,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx,
static void cayman_emit_event_write_eos(struct r600_context *rctx,
const struct r600_shader_atomic *atomic,
struct r600_resource *resource,
const unsigned buffer_offset,
uint32_t pkt_flags)
{
struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
@ -5314,7 +5317,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx,
resource,
RADEON_USAGE_WRITE |
RADEON_PRIO_SHADER_RW_BUFFER);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset;
if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)
event = EVENT_TYPE_CS_DONE;
@ -5332,6 +5335,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx,
static void cayman_write_count_to_gds(struct r600_context *rctx,
const struct r600_shader_atomic *atomic,
struct r600_resource *resource,
const unsigned buffer_offset,
const uint32_t pkt_flags)
{
struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
@ -5339,7 +5343,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
resource,
RADEON_USAGE_READ |
RADEON_PRIO_SHADER_RW_BUFFER);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
uint64_t dst_offset = resource->gpu_address + (atomic->start * 4) + buffer_offset;
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
radeon_emit(cs, dst_offset & 0xffffffff);
@ -5465,13 +5469,14 @@ void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
for (int i = 0; i < global_atomic_count; i++) {
const struct r600_shader_atomic *atomic = &combined_atomics[i];
struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer);
const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id];
struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer);
assert(resource);
if (rctx->b.gfx_level == CAYMAN)
cayman_write_count_to_gds(rctx, atomic, resource, pkt_flags);
cayman_write_count_to_gds(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags);
else
evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
evergreen_emit_set_append_cnt(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags);
}
}
@ -5495,13 +5500,14 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
for (int i = 0; i < global_atomic_count; i++) {
const struct r600_shader_atomic *atomic = &combined_atomics[i];
struct r600_resource *resource = r600_as_resource(astate->buffer[atomic->resource_id].buffer);
const struct pipe_shader_buffer *const atomic_buffer = &astate->buffer[atomic->resource_id];
struct r600_resource *resource = r600_as_resource(atomic_buffer->buffer);
assert(resource);
if (rctx->b.gfx_level == CAYMAN)
cayman_emit_event_write_eos(rctx, atomic, resource, pkt_flags);
cayman_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags);
else
evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_flags);
evergreen_emit_event_write_eos(rctx, atomic, resource, atomic_buffer->buffer_offset, pkt_flags);
}
if (pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE)