mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 05:50:11 +01:00
radv: recalculate legacy_gs_info on bind
Previously legacy_gs_info calculated based on gs_info->legacy_gs_info.esgs_itemsize which is calculated based on gs input varyings. However, when using ESO vs/tes can have outputs not read by gs, which leads to underestimating LDS usage. Cc: mesa-stable Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38514>
This commit is contained in:
parent
36f6cf8a35
commit
5e8885a339
6 changed files with 44 additions and 34 deletions
|
|
@ -12729,18 +12729,27 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_bind_gs_copy_shader(cmd_buffer, gs_copy_shader);
|
||||
|
||||
/* Determine NGG GS info. */
|
||||
/* Determine GS info. */
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] &&
|
||||
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg &&
|
||||
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.merged_shader_compiled_separately) {
|
||||
struct radv_shader *es = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
|
||||
? cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
|
||||
: cmd_buffer->state.shaders[MESA_SHADER_VERTEX];
|
||||
struct radv_shader *gs = cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY];
|
||||
|
||||
gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info);
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
|
||||
gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info);
|
||||
} else {
|
||||
radv_get_legacy_gs_info(device, &es->info, &gs->info);
|
||||
radv_precompute_registers_hw_gs(device, &es->info, &gs->info);
|
||||
|
||||
cmd_buffer->esgs_ring_size_needed =
|
||||
MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.legacy_gs_info.esgs_ring_size);
|
||||
cmd_buffer->gsvs_ring_size_needed =
|
||||
MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.legacy_gs_info.gsvs_ring_size);
|
||||
}
|
||||
}
|
||||
|
||||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
|
|
|
|||
|
|
@ -2949,7 +2949,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
}
|
||||
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg)
|
||||
radv_get_legacy_gs_info(device, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
|
||||
/* Compile NIR shaders to AMD assembly. */
|
||||
radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
|
||||
|
|
|
|||
|
|
@ -1641,60 +1641,59 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
void
|
||||
radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
|
||||
info->regs.gs.vgt_esgs_ring_itemsize = info->legacy_gs_info.esgs_itemsize;
|
||||
gs_info->regs.gs.vgt_esgs_ring_itemsize = es_info ? es_info->esgs_itemsize / 4 : gs_info->legacy_gs_info.esgs_itemsize;
|
||||
|
||||
info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
gs_info->regs.gs.vgt_gs_max_prims_per_subgroup =
|
||||
S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(info->legacy_gs_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
gs_info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(gs_info->legacy_gs_info.es_verts_per_subgroup) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(gs_info->legacy_gs_info.gs_prims_per_subgroup) |
|
||||
S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup);
|
||||
|
||||
const uint32_t gs_max_out_vertices = info->gs.vertices_out;
|
||||
const uint8_t max_stream = info->gs.num_components_per_stream[3] ? 3
|
||||
: info->gs.num_components_per_stream[2] ? 2
|
||||
: info->gs.num_components_per_stream[1] ? 1
|
||||
const uint32_t gs_max_out_vertices = gs_info->gs.vertices_out;
|
||||
const uint8_t max_stream = gs_info->gs.num_components_per_stream[3] ? 3
|
||||
: gs_info->gs.num_components_per_stream[2] ? 2
|
||||
: gs_info->gs.num_components_per_stream[1] ? 1
|
||||
: 0;
|
||||
const uint8_t *num_components = info->gs.num_components_per_stream;
|
||||
const uint8_t *num_components = gs_info->gs.num_components_per_stream;
|
||||
|
||||
uint32_t offset = num_components[0] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[0] = offset;
|
||||
|
||||
if (max_stream >= 1)
|
||||
offset += num_components[1] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[1] = offset;
|
||||
|
||||
if (max_stream >= 2)
|
||||
offset += num_components[2] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_offset[2] = offset;
|
||||
|
||||
if (max_stream >= 3)
|
||||
offset += num_components[3] * gs_max_out_vertices;
|
||||
info->regs.gs.vgt_gsvs_ring_itemsize = offset;
|
||||
gs_info->regs.gs.vgt_gsvs_ring_itemsize = offset;
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
gs_info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0;
|
||||
|
||||
const uint32_t gs_num_invocations = info->gs.invocations;
|
||||
info->regs.gs.vgt_gs_instance_cnt =
|
||||
const uint32_t gs_num_invocations = gs_info->gs.invocations;
|
||||
gs_info->regs.gs.vgt_gs_instance_cnt =
|
||||
S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
|
||||
|
||||
info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
gs_info->regs.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdev->info);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
gs_info->regs.spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), C_00B204_CU_EN_GFX10,
|
||||
16, &pdev->info);
|
||||
}
|
||||
|
||||
info->regs.vgt_gs_max_vert_out = info->gs.vertices_out;
|
||||
gs_info->regs.vgt_gs_max_vert_out = gs_info->gs.vertices_out;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2044,7 +2043,7 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary
|
|||
if (info->is_ngg) {
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
} else {
|
||||
radv_precompute_registers_hw_gs(device, binary);
|
||||
radv_precompute_registers_hw_gs(device, NULL, &binary->info);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
|
|
|
|||
|
|
@ -723,6 +723,8 @@ radv_shader_need_push_constants_upload(const struct radv_shader *shader)
|
|||
return loc->sgpr_idx != -1;
|
||||
}
|
||||
|
||||
void radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
|
||||
void radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config,
|
||||
struct radv_shader_info *info);
|
||||
|
||||
|
|
|
|||
|
|
@ -643,11 +643,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
|
|||
}
|
||||
|
||||
void
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info)
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info;
|
||||
const unsigned esgs_vertex_stride = out->esgs_itemsize * 4;
|
||||
const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize * 4;
|
||||
ac_legacy_gs_subgroup_info info;
|
||||
|
||||
ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations,
|
||||
|
|
|
|||
|
|
@ -334,7 +334,7 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad
|
|||
const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
|
||||
struct radv_shader_info *info);
|
||||
|
||||
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info);
|
||||
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
|
||||
void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue