amd,radeonsi: change enabled_rb_mask to 64 bits

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21641>
This commit is contained in:
Marek Olšák 2023-02-18 03:40:41 -05:00 committed by Marge Bot
parent 03ffb8d77c
commit 6e2e89e6d8
8 changed files with 30 additions and 23 deletions

View file

@ -1222,7 +1222,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
(info->num_cu / (info->num_se * info->max_sa_per_se * cu_group)) * cu_group;
memcpy(info->si_tile_mode_array, amdinfo.gb_tile_mode, sizeof(amdinfo.gb_tile_mode));
info->enabled_rb_mask = amdinfo.enabled_rb_pipes_mask;
info->enabled_rb_mask = device_info.enabled_rb_pipes_mask;
if (info->drm_minor >= 52)
info->enabled_rb_mask |= (uint64_t)device_info.enabled_rb_pipes_mask_hi << 32;
memcpy(info->cik_macrotile_mode_array, amdinfo.gb_macro_tile_mode,
sizeof(amdinfo.gb_macro_tile_mode));
@ -1324,7 +1327,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
info->family == CHIP_NAVI24 ||
info->family == CHIP_REMBRANDT ||
info->family == CHIP_VANGOGH) &&
util_bitcount(info->enabled_rb_mask) !=
util_bitcount64(info->enabled_rb_mask) !=
info->max_render_backends;
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
@ -1374,7 +1377,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */
info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se,
max_waves_per_tg);
info->num_rb = util_bitcount(info->enabled_rb_mask);
info->num_rb = util_bitcount64(info->enabled_rb_mask);
info->max_gflops = (info->gfx_level >= GFX11 ? 256 : 128) * info->num_cu * info->max_gpu_freq_mhz / 1000;
info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000);
info->has_pcie_bandwidth_info = info->drm_minor >= 51;
@ -1697,7 +1700,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " max_render_backends = %i\n", info->max_render_backends);
fprintf(f, " num_tile_pipes = %i\n", info->num_tile_pipes);
fprintf(f, " pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
fprintf(f, " enabled_rb_mask = 0x%x\n", info->enabled_rb_mask);
fprintf(f, " enabled_rb_mask = 0x%" PRIx64 "\n", info->enabled_rb_mask);
fprintf(f, " max_alignment = %u\n", (unsigned)info->max_alignment);
fprintf(f, " pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);

View file

@ -257,7 +257,7 @@ struct radeon_info {
uint32_t max_render_backends; /* number of render backends incl. disabled ones */
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
uint32_t pipe_interleave_bytes;
uint32_t enabled_rb_mask; /* GCN harvest config */
uint64_t enabled_rb_mask; /* bitmask of enabled physical RBs, up to max_render_backends bits */
uint64_t max_alignment; /* from addrlib */
uint32_t pbb_max_alloc_count;

View file

@ -90,7 +90,7 @@ build_occlusion_query_shader(struct radv_device *device)
* uint64_t dst_offset = dst_stride * global_id.x;
* bool available = true;
* for (int i = 0; i < db_count; ++i) {
* if (enabled_rb_mask & (1 << i)) {
* if (enabled_rb_mask & BITFIELD64_BIT(i)) {
* uint64_t start = src_buf[src_offset + 16 * i];
* uint64_t end = src_buf[src_offset + 16 * i + 8];
* if ((start & (1ull << 63)) && (end & (1ull << 63)))
@ -120,7 +120,7 @@ build_occlusion_query_shader(struct radv_device *device)
nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
unsigned db_count = device->physical_device->rad_info.max_render_backends;
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
@ -145,7 +145,8 @@ build_occlusion_query_shader(struct radv_device *device)
radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
nir_ssa_def *enabled_cond =
nir_iand_imm(&b, nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count), enabled_rb_mask);
nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count),
enabled_rb_mask);
nir_push_if(&b, nir_i2b(&b, enabled_cond));
@ -1242,14 +1243,14 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
case VK_QUERY_TYPE_OCCLUSION: {
uint64_t const *src64 = (uint64_t const *)src;
uint32_t db_count = device->physical_device->rad_info.max_render_backends;
uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
uint64_t sample_count = 0;
available = 1;
for (int i = 0; i < db_count; ++i) {
uint64_t start, end;
if (!(enabled_rb_mask & (1 << i)))
if (!(enabled_rb_mask & (1ull << i)))
continue;
do {
@ -1534,8 +1535,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4;
uint64_t enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
unsigned query = firstQuery + i;
uint64_t src_va = va + query * pool->stride + rb_avail_offset;

View file

@ -175,7 +175,7 @@ static void
si_set_raster_config(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs)
{
unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16);
unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
uint64_t rb_mask = physical_device->rad_info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
ac_get_raster_config(&physical_device->rad_info, &raster_config, &raster_config_1, NULL);
@ -183,7 +183,7 @@ si_set_raster_config(struct radv_physical_device *physical_device, struct radeon
/* Always use the default config when all backends are enabled
* (or when we failed to determine the enabled backends).
*/
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) {
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config);
if (physical_device->rad_info.gfx_level >= GFX7)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);

View file

@ -1320,7 +1320,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
printf("num_render_backends = %i\n", rscreen->info.max_render_backends);
printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
printf("enabled_rb_mask = 0x%" PRIx64 "\n", rscreen->info.enabled_rb_mask);
printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
}

View file

@ -617,7 +617,7 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
unsigned max_rbs = screen->info.max_render_backends;
unsigned enabled_rb_mask = screen->info.enabled_rb_mask;
uint64_t enabled_rb_mask = screen->info.enabled_rb_mask;
unsigned num_results;
unsigned i, j;
@ -625,7 +625,7 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_
num_results = qbuf->buf->b.b.width0 / query->result_size;
for (j = 0; j < num_results; j++) {
for (i = 0; i < max_rbs; i++) {
if (!(enabled_rb_mask & (1 << i))) {
if (!(enabled_rb_mask & (1ull << i))) {
results[(i * 4) + 1] = 0x80000000;
results[(i * 4) + 3] = 0x80000000;
}

View file

@ -2433,7 +2433,7 @@ static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format
/* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
* so don't expose 16 samples there.
*/
const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16;
const unsigned max_eqaa_samples = util_bitcount64(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16;
const unsigned max_samples = 8;
/* MSAA support without framebuffer attachments. */
@ -5540,11 +5540,11 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p
{
struct si_screen *sscreen = sctx->screen;
unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16);
unsigned rb_mask = sscreen->info.enabled_rb_mask;
uint64_t rb_mask = sscreen->info.enabled_rb_mask;
unsigned raster_config = sscreen->pa_sc_raster_config;
unsigned raster_config_1 = sscreen->pa_sc_raster_config_1;
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) {
/* Always use the default config when all backends are enabled
* (or when we failed to determine the enabled backends).
*/

View file

@ -443,9 +443,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
* This fails (silently) on non-GCN or older kernels, overwriting the
* default enabled_rb_mask with the result of the last query.
*/
if (ws->gen >= DRV_SI)
radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
&ws->info.enabled_rb_mask);
if (ws->gen >= DRV_SI) {
uint32_t mask;
radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, &mask);
ws->info.enabled_rb_mask = mask;
}
ws->info.r600_has_virtual_memory = false;