mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
ac/gpu_info: add #define AMD_MEMCHANNEL_INTERLEAVE_BYTES
radeon_info::pipe_interleave_bytes is renamed to r600_pipe_interleave_bytes where it can be 512 on some chips. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39120>
This commit is contained in:
parent
92133bb0ab
commit
13cfd0176c
10 changed files with 32 additions and 32 deletions
|
|
@ -946,11 +946,13 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->gb_addr_config = 0;
|
||||
|
||||
info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(info->gb_addr_config);
|
||||
info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
|
||||
assert((256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config)) ==
|
||||
AMD_MEMCHANNEL_INTERLEAVE_BYTES);
|
||||
} else {
|
||||
unsigned pipe_config = G_009910_PIPE_CONFIG(amdinfo.gb_tile_mode[CIK_TILE_MODE_COLOR_2D]);
|
||||
info->num_tile_pipes = ac_pipe_config_to_num_pipes(pipe_config);
|
||||
info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config);
|
||||
assert((256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config)) ==
|
||||
AMD_MEMCHANNEL_INTERLEAVE_BYTES);
|
||||
}
|
||||
info->r600_has_virtual_memory = true;
|
||||
|
||||
|
|
@ -1992,7 +1994,6 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd)
|
|||
fprintf(f, " pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override);
|
||||
fprintf(f, " max_render_backends = %i\n", info->max_render_backends);
|
||||
fprintf(f, " num_tile_pipes = %i\n", info->num_tile_pipes);
|
||||
fprintf(f, " pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
|
||||
fprintf(f, " enabled_rb_mask = 0x%" PRIx64 "\n", info->enabled_rb_mask);
|
||||
fprintf(f, " max_alignment = %u\n", (unsigned)info->max_alignment);
|
||||
fprintf(f, " pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);
|
||||
|
|
@ -2390,7 +2391,7 @@ void ac_get_task_info(const struct radeon_info *info,
|
|||
* 64K | 550 | 574 | +4.3%
|
||||
* # Adding 256 mitigates the performance loss from increasing num_entries.
|
||||
*/
|
||||
const uint32_t payload_entry_size = 16384 + info->pipe_interleave_bytes;
|
||||
const uint32_t payload_entry_size = 16384 + AMD_MEMCHANNEL_INTERLEAVE_BYTES;
|
||||
const uint16_t num_entries = get_task_num_entries(info->family);
|
||||
const uint32_t draw_ring_bytes = num_entries * AC_TASK_DRAW_ENTRY_BYTES;
|
||||
const uint32_t payload_ring_bytes = num_entries * payload_entry_size;
|
||||
|
|
|
|||
|
|
@ -82,10 +82,10 @@ extern "C" {
|
|||
* - Vertex 0 of each TCS output starts on an address aligned to 256 to make TCS output stores
|
||||
* from each subgroup always store 256B-aligned blocks of 256*N bytes.
|
||||
*
|
||||
* Number 256 comes from GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE and is stored in
|
||||
* radeon_info::pipe_interleave_bytes. It's always 256 on all GCN and RDNA chips. "Pipe" means
|
||||
* a memory channel in this context.
|
||||
* Number 256 comes from GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE. It's always 256 on all GCN and RDNA
|
||||
* chips. "Pipe" means a memory channel in this context.
|
||||
*/
|
||||
#define AMD_MEMCHANNEL_INTERLEAVE_BYTES 256 /* always equal to GB_ADDR_CONFIG.PIPE_INTERLEAVE_SIZE */
|
||||
|
||||
struct amdgpu_gpu_info;
|
||||
struct drm_amdgpu_info_device;
|
||||
|
|
@ -417,12 +417,12 @@ struct radeon_info {
|
|||
uint32_t r600_gb_backend_map; /* R600 harvest config */
|
||||
bool r600_gb_backend_map_valid;
|
||||
uint32_t r600_num_banks;
|
||||
uint32_t r600_pipe_interleave_bytes;
|
||||
uint32_t mc_arb_ramcfg;
|
||||
uint32_t gb_addr_config;
|
||||
uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
|
||||
uint32_t max_render_backends; /* number of render backends incl. disabled ones */
|
||||
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
|
||||
uint32_t pipe_interleave_bytes;
|
||||
uint64_t enabled_rb_mask; /* bitmask of enabled physical RBs, up to max_render_backends bits */
|
||||
uint64_t max_alignment; /* from addrlib */
|
||||
uint32_t pbb_max_alloc_count;
|
||||
|
|
|
|||
|
|
@ -908,8 +908,7 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
|
|||
return CLAMP(workgroup_size, 1, 256);
|
||||
}
|
||||
|
||||
static unsigned get_tcs_wg_output_mem_size(const struct radeon_info *info,
|
||||
uint32_t num_tcs_output_cp, uint32_t num_mem_tcs_outputs,
|
||||
static unsigned get_tcs_wg_output_mem_size(uint32_t num_tcs_output_cp, uint32_t num_mem_tcs_outputs,
|
||||
uint32_t num_mem_tcs_patch_outputs, uint32_t num_patches)
|
||||
{
|
||||
/* Align each per-vertex and per-patch output to 16 vec4 elements = 256B. It's most optimal when
|
||||
|
|
@ -920,8 +919,8 @@ static unsigned get_tcs_wg_output_mem_size(const struct radeon_info *info,
|
|||
* cover 5 channels (128B .. 1.125K) instead of 4, which could increase VMEM latency.
|
||||
*/
|
||||
unsigned mem_one_pervertex_output = align(16 * num_tcs_output_cp * num_patches,
|
||||
info->pipe_interleave_bytes);
|
||||
unsigned mem_one_perpatch_output = align(16 * num_patches, info->pipe_interleave_bytes);
|
||||
AMD_MEMCHANNEL_INTERLEAVE_BYTES);
|
||||
unsigned mem_one_perpatch_output = align(16 * num_patches, AMD_MEMCHANNEL_INTERLEAVE_BYTES);
|
||||
|
||||
return mem_one_pervertex_output * num_mem_tcs_outputs +
|
||||
mem_one_perpatch_output * num_mem_tcs_patch_outputs;
|
||||
|
|
@ -957,20 +956,20 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu
|
|||
num_patches = MIN2(num_patches, 16); /* recommended */
|
||||
|
||||
/* Make sure the output data fits in the offchip buffer */
|
||||
unsigned mem_size = get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
unsigned mem_size = get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
num_mem_tcs_patch_outputs, num_patches);
|
||||
if (mem_size > info->hs_offchip_workgroup_dw_size * 4) {
|
||||
/* Find the number of patches that fit in memory. Each output is aligned separately,
|
||||
* so this division won't return a precise result.
|
||||
*/
|
||||
num_patches = info->hs_offchip_workgroup_dw_size * 4 /
|
||||
get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
num_mem_tcs_patch_outputs, 1);
|
||||
assert(get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
assert(get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
num_mem_tcs_patch_outputs, num_patches) <=
|
||||
info->hs_offchip_workgroup_dw_size * 4);
|
||||
|
||||
while (get_tcs_wg_output_mem_size(info, num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
while (get_tcs_wg_output_mem_size(num_tcs_output_cp, num_mem_tcs_outputs,
|
||||
num_mem_tcs_patch_outputs, num_patches + 1) <=
|
||||
info->hs_offchip_workgroup_dw_size * 4)
|
||||
num_patches++;
|
||||
|
|
@ -1046,7 +1045,7 @@ ac_compute_scratch_wavesize(const struct radeon_info *info, uint32_t bytes_per_w
|
|||
* scratch performance by more randomly distributing scratch waves among
|
||||
* memory channels.
|
||||
*
|
||||
* On GFX11+, this is exactly "|= info->pipe_interleave_bytes".
|
||||
* On GFX11+, this is exactly "|= AMD_MEMCHANNEL_INTERLEAVE_BYTES".
|
||||
*/
|
||||
if (bytes_per_wave)
|
||||
bytes_per_wave |= info->scratch_wavesize_granularity;
|
||||
|
|
|
|||
|
|
@ -1276,7 +1276,7 @@ static int gfx6_surface_settings(struct ac_addrlib *addrlib, const struct radeon
|
|||
static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
|
||||
struct radeon_surf *surf)
|
||||
{
|
||||
unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
|
||||
unsigned pipe_interleave_bytes = AMD_MEMCHANNEL_INTERLEAVE_BYTES;
|
||||
unsigned num_pipes = info->num_tile_pipes;
|
||||
unsigned cl_width, cl_height;
|
||||
|
||||
|
|
|
|||
|
|
@ -568,14 +568,12 @@ ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *op
|
|||
* the beginning a 256B block and clear/copy whole 256B blocks. Clearing/copying a 256B block
|
||||
* partially for each wave is inefficient, which happens when dst_offset isn't aligned to 256.
|
||||
* Clearing/copying whole 256B blocks per wave isn't possible if dwords_per_thread isn't 2^n.
|
||||
*
|
||||
* pipe_interleave_bytes is 256.
|
||||
*/
|
||||
unsigned start_thread =
|
||||
dst_offset_bound % options->info->pipe_interleave_bytes &&
|
||||
dst_offset_bound % AMD_MEMCHANNEL_INTERLEAVE_BYTES &&
|
||||
util_is_power_of_two_nonzero(dwords_per_thread) ?
|
||||
DIV_ROUND_UP(options->info->pipe_interleave_bytes -
|
||||
dst_offset_bound % options->info->pipe_interleave_bytes,
|
||||
DIV_ROUND_UP(AMD_MEMCHANNEL_INTERLEAVE_BYTES -
|
||||
dst_offset_bound % AMD_MEMCHANNEL_INTERLEAVE_BYTES,
|
||||
dwords_per_thread * 4) : 0;
|
||||
out->shader_key.has_start_thread = start_thread != 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -1076,7 +1076,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
|
|||
const struct util_format_description *desc;
|
||||
unsigned block_size = util_format_get_blocksize(res->b.b.format);
|
||||
unsigned pitch_alignment =
|
||||
MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
|
||||
MAX2(64, rctx->screen->b.info.r600_pipe_interleave_bytes / block_size);
|
||||
unsigned pitch = align(res->b.b.width0, pitch_alignment);
|
||||
int i;
|
||||
unsigned width_elements;
|
||||
|
|
|
|||
|
|
@ -1078,7 +1078,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
|
||||
printf("num_render_backends = %i\n", rscreen->info.max_render_backends);
|
||||
printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
|
||||
printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
|
||||
printf("pipe_interleave_bytes = %i\n", rscreen->info.r600_pipe_interleave_bytes);
|
||||
printf("enabled_rb_mask = 0x%" PRIx64 "\n", rscreen->info.enabled_rb_mask);
|
||||
printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -723,7 +723,7 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
|||
unsigned element_bits = 4;
|
||||
unsigned cmask_cache_bits = 1024;
|
||||
unsigned num_pipes = rscreen->info.num_tile_pipes;
|
||||
unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
|
||||
unsigned pipe_interleave_bytes = rscreen->info.r600_pipe_interleave_bytes;
|
||||
|
||||
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
|
||||
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
|
||||
|
|
@ -843,7 +843,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
|||
slice_elements = (width * height) / (8 * 8);
|
||||
slice_bytes = slice_elements * 4;
|
||||
|
||||
pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
|
||||
pipe_interleave_bytes = rscreen->info.r600_pipe_interleave_bytes;
|
||||
base_align = num_pipes * pipe_interleave_bytes;
|
||||
|
||||
rtex->surface.meta_alignment_log2 = util_logbase2(base_align);
|
||||
|
|
|
|||
|
|
@ -204,7 +204,8 @@ static void si_compute_cmask(const struct radeon_info *info,
|
|||
const struct ac_surf_config *config,
|
||||
struct radeon_surf *surf)
|
||||
{
|
||||
unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
|
||||
unsigned pipe_interleave_bytes = info->gfx_level >= GFX6 ? AMD_MEMCHANNEL_INTERLEAVE_BYTES :
|
||||
info->r600_pipe_interleave_bytes;
|
||||
unsigned num_pipes = info->num_tile_pipes;
|
||||
unsigned cl_width, cl_height;
|
||||
|
||||
|
|
@ -315,7 +316,8 @@ static void si_compute_htile(const struct radeon_info *info,
|
|||
slice_elements = (width * height) / (8 * 8);
|
||||
slice_bytes = slice_elements * 4;
|
||||
|
||||
pipe_interleave_bytes = info->pipe_interleave_bytes;
|
||||
pipe_interleave_bytes = info->gfx_level >= GFX6 ? AMD_MEMCHANNEL_INTERLEAVE_BYTES :
|
||||
info->r600_pipe_interleave_bytes;
|
||||
base_align = num_pipes * pipe_interleave_bytes;
|
||||
|
||||
surf->meta_alignment_log2 = util_logbase2(base_align);
|
||||
|
|
|
|||
|
|
@ -465,13 +465,13 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
4 << ((tiling_config & 0xf0) >> 4) :
|
||||
4 << ((tiling_config & 0x30) >> 4);
|
||||
|
||||
ws->info.pipe_interleave_bytes =
|
||||
ws->info.r600_pipe_interleave_bytes =
|
||||
ws->info.gfx_level >= EVERGREEN ?
|
||||
256 << ((tiling_config & 0xf00) >> 8) :
|
||||
256 << ((tiling_config & 0xc0) >> 6);
|
||||
|
||||
if (!ws->info.pipe_interleave_bytes)
|
||||
ws->info.pipe_interleave_bytes =
|
||||
if (!ws->info.r600_pipe_interleave_bytes)
|
||||
ws->info.r600_pipe_interleave_bytes =
|
||||
ws->info.gfx_level >= EVERGREEN ? 512 : 256;
|
||||
|
||||
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue