r600: unlock cayman number of atomic counters

This change updates the cayman atomic counters implementation
to be compatible with the OpenGL limit MAX_ATOMIC_COUNTERS (4096).
The previous limit was eight.

This change preserves the compatibility with the evergreen
implementation.

Here is the improvement at the test level:
khr-gl4[2-5]/shader_atomic_counters/basic-usage-tes: fail pass
spec/arb_arrays_of_arrays/execution/atomic_counters/fs-simple-inc-dec-read: skip pass
spec/arb_arrays_of_arrays/execution/atomic_counters/vs-simple-inc-dec-read: skip pass
spec/arb_arrays_of_arrays/linker/vs-to-fs-atomic-counter: skip pass
spec/arb_shader_atomic_counters/active-counters: skip pass

Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Reviewed-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34726>
This commit is contained in:
Patrick Lerda 2025-04-25 13:31:44 +02:00 committed by Marge Bot
parent 0d9bc4eb38
commit 3fc4fc6a60
5 changed files with 70 additions and 5 deletions

View file

@ -342,7 +342,11 @@ static void compute_emit_cs(struct r600_context *rctx,
rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
global_atomic_count = evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, global_atomic_count);
if (rctx->b.gfx_level == CAYMAN)
global_atomic_count = cayman_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, global_atomic_count);
else
global_atomic_count = evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, global_atomic_count);
r600_need_cs_space(rctx, 0, true, global_atomic_count);
if (need_buf_const) {

View file

@ -5075,7 +5075,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx,
radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
radeon_emit(cs, (dst_offset) & 0xffffffff);
radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff));
radeon_emit(cs, (atomic->hw_idx) | (1 << 16));
radeon_emit(cs, (atomic->hw_idx) | (atomic->count << 16));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
}
@ -5098,11 +5098,13 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset >> 32) & 0xff));// GDS
radeon_emit(cs, atomic->hw_idx * 4);
radeon_emit(cs, 0);
radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4);
radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | (atomic->count * 4));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
}
/* The evergreen_emit_atomic_buffer_setup_count() implementation is designed to map one atomic counter
* per R_02872C_GDS_APPEND_COUNT_x register which limits the total number of atomic counters to 12. */
unsigned evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
struct r600_pipe_shader *cs_shader,
struct r600_shader_atomic *combined_atomics,
@ -5150,6 +5152,58 @@ unsigned evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
return global_atomic_count;
}
unsigned cayman_emit_atomic_buffer_setup_count(struct r600_context *rctx,
struct r600_pipe_shader *cs_shader,
struct r600_shader_atomic *combined_atomics,
unsigned global_atomic_count)
{
const bool is_compute = !!cs_shader;
int i, j;
for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
unsigned num_atomic_ranges;
struct r600_pipe_shader *pshader;
if (is_compute)
pshader = cs_shader;
else
pshader = rctx->hw_shader_stages[i].shader;
if (!pshader)
continue;
num_atomic_ranges = pshader->shader.nhwatomic_ranges;
if (!num_atomic_ranges)
continue;
for (j = 0; j < num_atomic_ranges; j++) {
const struct r600_shader_atomic *atomic = &pshader->shader.atomics[j];
const int k = global_atomic_count;
bool found = false;
for (int atomic_offset = 0; atomic_offset < k; atomic_offset++) {
if (combined_atomics[atomic_offset].resource_id == atomic->resource_id &&
combined_atomics[atomic_offset].hw_idx == atomic->hw_idx &&
combined_atomics[atomic_offset].start == atomic->start &&
combined_atomics[atomic_offset].count == atomic->count) {
found = true;
break;
}
}
if (!found) {
assert(k < EG_MAX_ATOMIC_BUFFERS);
combined_atomics[k].hw_idx = atomic->hw_idx;
combined_atomics[k].resource_id = atomic->resource_id;
combined_atomics[k].start = atomic->start;
combined_atomics[k].count = atomic->count;
global_atomic_count = k + 1;
}
}
}
return global_atomic_count;
}
void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
const bool is_compute,
const struct r600_shader_atomic *combined_atomics,

View file

@ -1,7 +1,7 @@
#ifndef R600_ATOMICS_H
#define R600_ATOMICS_H
#define CM_MAX_ATOMIC_COUNTERS 8
#define CM_MAX_ATOMIC_COUNTERS 4096
#define EG_MAX_ATOMIC_COUNTERS 8
#define EG_MAX_ATOMIC_BUFFERS 8

View file

@ -1074,6 +1074,10 @@ unsigned evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
struct r600_pipe_shader *cs_shader,
struct r600_shader_atomic *combined_atomics,
unsigned global_atomic_count);
unsigned cayman_emit_atomic_buffer_setup_count(struct r600_context *rctx,
struct r600_pipe_shader *cs_shader,
struct r600_shader_atomic *combined_atomics,
unsigned global_atomic_count);
void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
const bool is_compute,
const struct r600_shader_atomic *combined_atomics,

View file

@ -2227,7 +2227,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
if (rctx->b.gfx_level >= EVERGREEN) {
global_atomic_count = evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, global_atomic_count);
if (rctx->b.gfx_level == EVERGREEN)
global_atomic_count = evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, global_atomic_count);
else
global_atomic_count = cayman_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, global_atomic_count);
}
if (index_size) {