mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
radv: recalculate legacy_gs_info on bind
Previously legacy_gs_info calculated based on
gs_info->legacy_gs_info.esgs_itemsize which is calculated based on gs
input varyings.
However, when using ESO vs/tes can have outputs not read by gs, which
leads to underestimating LDS usage.
Cc: mesa-stable
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38514>
(cherry picked from commit 5e8885a339)
This commit is contained in:
parent
0f909636cc
commit
bc639539af
7 changed files with 45 additions and 35 deletions
|
|
@ -4414,7 +4414,7 @@
|
|||
"description": "radv: recalculate legacy_gs_info on bind",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -11536,18 +11536,27 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_bind_gs_copy_shader(cmd_buffer, gs_copy_shader);
|
||||
|
||||
/* Determine NGG GS info. */
|
||||
/* Determine GS info. */
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] &&
|
||||
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg &&
|
||||
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.merged_shader_compiled_separately) {
|
||||
struct radv_shader *es = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
|
||||
? cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
|
||||
: cmd_buffer->state.shaders[MESA_SHADER_VERTEX];
|
||||
struct radv_shader *gs = cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY];
|
||||
|
||||
gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info);
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
|
||||
gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info);
|
||||
} else {
|
||||
radv_get_legacy_gs_info(device, &es->info, &gs->info);
|
||||
radv_precompute_registers_hw_gs(device, &es->info, &gs->info);
|
||||
|
||||
cmd_buffer->esgs_ring_size_needed =
|
||||
MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.legacy_gs_info.esgs_ring_size);
|
||||
cmd_buffer->gsvs_ring_size_needed =
|
||||
MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.legacy_gs_info.gsvs_ring_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine the rasterized primitive. */
|
||||
|
|
|
|||
|
|
@ -2803,7 +2803,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
}
|
||||
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg)
|
||||
radv_get_legacy_gs_info(device, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
|
||||
/* Compile NIR shaders to AMD assembly. */
|
||||
radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
|
||||
|
|
|
|||
|
|
@ -1494,60 +1494,59 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
|
||||
info->regs.gs.vgt_esgs_ring_itemsize = info->legacy_gs_info.esgs_itemsize;
|
||||
gs_info->regs.gs.vgt_esgs_ring_itemsize = es_info ? es_info->esgs_itemsize / 4 : gs_info->legacy_gs_info.esgs_itemsize;
|
||||
|
||||
info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
gs_info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(info->legacy_gs_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
gs_info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(gs_info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(gs_info->legacy_gs_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
const uint32_t gs_max_out_vertices = info->gs.vertices_out;
|
||||
const uint8_t max_stream = info->gs.num_components_per_stream[3] ? 3
|
||||
: info->gs.num_components_per_stream[2] ? 2
|
||||
: info->gs.num_components_per_stream[1] ? 1
|
||||
const uint32_t gs_max_out_vertices = gs_info->gs.vertices_out;
|
||||
const uint8_t max_stream = gs_info->gs.num_components_per_stream[3] ? 3
|
||||
: gs_info->gs.num_components_per_stream[2] ? 2
|
||||
: gs_info->gs.num_components_per_stream[1] ? 1
|
||||
: 0;
|
||||
const uint8_t *num_components = info->gs.num_components_per_stream;
|
||||
const uint8_t *num_components = gs_info->gs.num_components_per_stream;
|
||||
|
||||
uint32_t offset = num_components[0] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
|
||||
if (max_stream >= 1)
|
||||
offset += num_components[1] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
|
||||
if (max_stream >= 2)
|
||||
offset += num_components[2] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
|
||||
if (max_stream >= 3)
|
||||
offset += num_components[3] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_itemsize = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_itemsize = offset;
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
gs_info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
|
||||
const uint32_t gs_num_invocations = info->gs.invocations;
|
||||
info->regs.gs.vgt_gs_instance_cnt =
|
||||
const uint32_t gs_num_invocations = gs_info->gs.invocations;
|
||||
gs_info->regs.gs.vgt_gs_instance_cnt =
|
||||
S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
|
||||
|
||||
info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
gs_info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdev->info);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
gs_info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), C_00B204_CU_EN_GFX10,
|
||||
16, &pdev->info);
|
||||
}
|
||||
|
||||
info->regs.vgt_gs_max_vert_out = info->gs.vertices_out;
|
||||
gs_info->regs.vgt_gs_max_vert_out = gs_info->gs.vertices_out;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1897,7 +1896,7 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary
|
|||
if (info->is_ngg) {
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
} else {
|
||||
radv_precompute_registers_hw_gs(device, binary);
|
||||
radv_precompute_registers_hw_gs(device, NULL, &binary->info);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
|
|
|
|||
|
|
@ -736,6 +736,8 @@ uint32_t radv_get_user_sgpr_loc(const struct radv_shader *shader, int idx);
|
|||
|
||||
uint32_t radv_get_user_sgpr(const struct radv_shader *shader, int idx);
|
||||
|
||||
void radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
|
||||
void radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config,
|
||||
struct radv_shader_info *info);
|
||||
|
||||
|
|
|
|||
|
|
@ -677,11 +677,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
|
|||
}
|
||||
|
||||
void
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info)
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info;
|
||||
const unsigned esgs_vertex_stride = out->esgs_itemsize * 4;
|
||||
const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize * 4;
|
||||
ac_legacy_gs_subgroup_info info;
|
||||
|
||||
ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations,
|
||||
|
|
|
|||
|
|
@ -329,7 +329,7 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad
|
|||
const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
|
||||
struct radv_shader_info *info);
|
||||
|
||||
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info);
|
||||
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
|
||||
void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue