mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 16:40:13 +01:00
ac/gpu_info: split has_set_pairs_packets into context and sh flags
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095>
This commit is contained in:
parent
551f61bdb3
commit
b74d849a29
9 changed files with 27 additions and 33 deletions
|
|
@ -1262,14 +1262,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->has_export_conflict_bug = info->gfx_level == GFX11;
|
||||
|
||||
/* Only dGPUs have SET_*_PAIRS packets for now.
|
||||
* Register shadowing is only required by SET_SH_REG_PAIRS*, but we require it
|
||||
* for SET_CONTEXT_REG_PAIRS* as well for simplicity.
|
||||
*/
|
||||
info->has_set_pairs_packets = info->gfx_level >= GFX11 &&
|
||||
info->register_shadowing_required &&
|
||||
info->has_dedicated_vram;
|
||||
|
||||
/* GFX6-8 SDMA can't ignore page faults on unmapped sparse resources. */
|
||||
info->sdma_supports_sparse = info->gfx_level >= GFX9;
|
||||
|
||||
|
|
@ -1584,6 +1576,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->fw_based_mcbp.csa_alignment = device_info.csa_alignment;
|
||||
}
|
||||
|
||||
if (info->gfx_level >= GFX11 && info->has_dedicated_vram) {
|
||||
info->has_set_context_pairs_packed = true;
|
||||
info->has_set_sh_pairs_packed = info->register_shadowing_required;
|
||||
}
|
||||
|
||||
set_custom_cu_en_mask(info);
|
||||
|
||||
const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL);
|
||||
|
|
@ -1746,7 +1743,8 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " never_send_perfcounter_stop = %i\n", info->never_send_perfcounter_stop);
|
||||
fprintf(f, " discardable_allows_big_page = %i\n", info->discardable_allows_big_page);
|
||||
fprintf(f, " has_taskmesh_indirect0_bug = %i\n", info->has_taskmesh_indirect0_bug);
|
||||
fprintf(f, " has_set_pairs_packets = %i\n", info->has_set_pairs_packets);
|
||||
fprintf(f, " has_set_context_pairs_packed = %i\n", info->has_set_context_pairs_packed);
|
||||
fprintf(f, " has_set_sh_pairs_packed = %i\n", info->has_set_sh_pairs_packed);
|
||||
fprintf(f, " conformant_trunc_coord = %i\n", info->conformant_trunc_coord);
|
||||
|
||||
fprintf(f, "Display features:\n");
|
||||
|
|
|
|||
|
|
@ -112,10 +112,10 @@ struct radeon_info {
|
|||
bool has_export_conflict_bug;
|
||||
bool has_vrs_ds_export_bug;
|
||||
bool has_taskmesh_indirect0_bug;
|
||||
bool has_set_pairs_packets;
|
||||
bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */
|
||||
bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */
|
||||
|
||||
bool has_set_context_pairs_packed;
|
||||
bool has_set_sh_pairs_packed;
|
||||
|
||||
/* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -499,7 +499,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
|||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo,
|
||||
RADEON_USAGE_READ | RADEON_PRIO_SHADER_BINARY);
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
|
||||
radeon_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1,
|
||||
SI_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1);
|
||||
|
|
@ -740,7 +740,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
}
|
||||
radeon_begin_again(cs);
|
||||
} else {
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_compute_sh_reg(grid_size_reg, info->grid[0]);
|
||||
radeon_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
|
||||
radeon_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
|
||||
|
|
@ -756,7 +756,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
if (sel->info.uses_variable_block_size) {
|
||||
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_compute_sh_reg(block_size_reg, value);
|
||||
} else {
|
||||
radeon_set_sh_reg(block_size_reg, value);
|
||||
|
|
@ -766,7 +766,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
if (sel->info.base.cs.user_data_components_amd) {
|
||||
unsigned num = sel->info.base.cs.user_data_components_amd;
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
for (unsigned i = 0; i < num; i++)
|
||||
radeon_push_compute_sh_reg(cs_user_data_reg + i * 4, sctx->cs_user_data[i]);
|
||||
} else {
|
||||
|
|
@ -802,7 +802,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
|
|||
sctx->cs_max_waves_per_sh,
|
||||
threadgroups_per_cu);
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS,
|
||||
SI_TRACKED_COMPUTE_RESOURCE_LIMITS,
|
||||
compute_resource_limits);
|
||||
|
|
@ -844,7 +844,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
|
|||
dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
|
||||
}
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X,
|
||||
SI_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]);
|
||||
radeon_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y,
|
||||
|
|
|
|||
|
|
@ -2187,7 +2187,7 @@ void si_shader_change_notify(struct si_context *sctx)
|
|||
if (sh_reg_base) { \
|
||||
unsigned mask = shader_pointers_dirty & (pointer_mask); \
|
||||
\
|
||||
if (sctx->screen->info.has_set_pairs_packets) { \
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) { \
|
||||
u_foreach_bit(i, mask) { \
|
||||
struct si_descriptors *descs = &sctx->descriptors[i]; \
|
||||
unsigned sh_reg = sh_reg_base + descs->shader_userdata_offset; \
|
||||
|
|
@ -2214,7 +2214,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de
|
|||
{
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + descs->shader_userdata_offset,
|
||||
descs->gpu_address);
|
||||
radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + descs->shader_userdata_offset,
|
||||
|
|
@ -2292,7 +2292,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, unsigned index)
|
|||
sh_base[PIPE_SHADER_GEOMETRY], gfx);
|
||||
|
||||
if (sctx->gs_attribute_ring_pointer_dirty) {
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
|
||||
GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4,
|
||||
sctx->screen->attribute_ring->gpu_address);
|
||||
|
|
@ -2340,7 +2340,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
|||
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
|
||||
|
||||
if (sctx->compute_bindless_pointer_dirty) {
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_compute_sh_reg(base + sctx->bindless_descriptors.shader_userdata_offset,
|
||||
sctx->bindless_descriptors.gpu_address);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1202,13 +1202,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
sscreen->info.use_display_dcc_with_retile_blit = false;
|
||||
}
|
||||
|
||||
if (sscreen->debug_flags & DBG(SHADOW_REGS)) {
|
||||
/* Using the environment variable doesn't enable PAIRS packets for simplicity. */
|
||||
if (sscreen->debug_flags & DBG(SHADOW_REGS))
|
||||
sscreen->info.register_shadowing_required = true;
|
||||
/* Recompute has_set_pairs_packets. */
|
||||
sscreen->info.has_set_pairs_packets = sscreen->info.gfx_level >= GFX11 &&
|
||||
sscreen->info.register_shadowing_required &&
|
||||
sscreen->info.has_dedicated_vram;
|
||||
}
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
sscreen->use_aco = (sscreen->debug_flags & DBG(USE_ACO));
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ static unsigned pairs_packed_opcode_to_regular(unsigned opcode)
|
|||
|
||||
static unsigned regular_opcode_to_pairs(struct si_pm4_state *state, unsigned opcode)
|
||||
{
|
||||
if (state->screen->info.has_set_pairs_packets) {
|
||||
if (state->screen->info.has_set_sh_pairs_packed) {
|
||||
switch (opcode) {
|
||||
case PKT3_SET_CONTEXT_REG:
|
||||
return PKT3_SET_CONTEXT_REG_PAIRS_PACKED;
|
||||
|
|
|
|||
|
|
@ -2407,7 +2407,7 @@ static void si_init_draw_vbo(struct si_context *sctx)
|
|||
if (!NGG && GFX_VERSION >= GFX11)
|
||||
return;
|
||||
|
||||
if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_pairs_packets) {
|
||||
if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
|
||||
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_PAIRS_ON>;
|
||||
|
||||
|
|
|
|||
|
|
@ -1195,7 +1195,7 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
|
|||
radeon_begin_again(&sctx->gfx_cs);
|
||||
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
|
||||
shader->ngg.ge_pc_alloc);
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
assert(!sctx->screen->info.uses_kernel_cu_mask);
|
||||
radeon_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
|
|
@ -4523,7 +4523,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx, unsigned index
|
|||
if (!sctx->shader.tes.cso || !sctx->shader.tcs.current)
|
||||
return;
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2);
|
||||
|
||||
|
|
@ -4572,7 +4572,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx, unsigned index
|
|||
* tessellation is disabled. That's because those user SGPRs are only set in LS
|
||||
* for tessellation.
|
||||
*/
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
|
||||
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
|
||||
sctx->tcs_offchip_layout);
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ static void si_emit_cull_state(struct si_context *sctx, unsigned index)
|
|||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf,
|
||||
RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER);
|
||||
|
||||
if (sctx->screen->info.has_set_pairs_packets) {
|
||||
if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
|
||||
GFX9_SGPR_SMALL_PRIM_CULL_INFO * 4,
|
||||
sctx->small_prim_cull_info_address);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue