diff --git a/.pick_status.json b/.pick_status.json index e37027d1f72..5f7b1b413f9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4414,7 +4414,7 @@ "description": "radv: recalculate legacy_gs_info on bind", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 160ff3c72ce..13b0c3495e8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -11536,18 +11536,27 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) radv_bind_gs_copy_shader(cmd_buffer, gs_copy_shader); - /* Determine NGG GS info. */ + /* Determine GS info. */ if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] && - cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg && cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.merged_shader_compiled_separately) { struct radv_shader *es = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL] ? cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL] : cmd_buffer->state.shaders[MESA_SHADER_VERTEX]; struct radv_shader *gs = cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]; - gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info); - gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info); - radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info); + if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) { + gfx10_ngg_set_esgs_ring_itemsize(device, &es->info, &gs->info, &gs->info.ngg_info); + gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info); + radv_precompute_registers_hw_ngg(device, &gs->config, &gs->info); + } else { + radv_get_legacy_gs_info(device, &es->info, &gs->info); + radv_precompute_registers_hw_gs(device, &es->info, &gs->info); + + cmd_buffer->esgs_ring_size_needed = + MAX2(cmd_buffer->esgs_ring_size_needed, gs->info.legacy_gs_info.esgs_ring_size); + cmd_buffer->gsvs_ring_size_needed = + MAX2(cmd_buffer->gsvs_ring_size_needed, gs->info.legacy_gs_info.gsvs_ring_size); + } } /* Determine the rasterized primitive. */ diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 5b65590b53e..25a63f33b7e 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2803,7 +2803,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac } if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg) - radv_get_legacy_gs_info(device, &stages[MESA_SHADER_GEOMETRY].info); + radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info); /* Compile NIR shaders to AMD assembly. */ radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 65bc4a31123..ce45a43b403 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1494,60 +1494,59 @@ radv_precompute_registers_hw_vs(struct radv_device *device, struct radv_shader_b } } -static void -radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_binary *binary) +void +radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info) { const struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_shader_info *info = &binary->info; - info->regs.gs.vgt_esgs_ring_itemsize = info->legacy_gs_info.esgs_itemsize; + gs_info->regs.gs.vgt_esgs_ring_itemsize = es_info ? es_info->esgs_itemsize / 4 : gs_info->legacy_gs_info.esgs_itemsize; - info->regs.gs.vgt_gs_max_prims_per_subgroup = - S_028A94_MAX_PRIMS_PER_SUBGROUP(info->legacy_gs_info.gs_inst_prims_in_subgroup); + gs_info->regs.gs.vgt_gs_max_prims_per_subgroup = + S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup); - info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(info->legacy_gs_info.es_verts_per_subgroup) | - S_028A44_GS_PRIMS_PER_SUBGRP(info->legacy_gs_info.gs_prims_per_subgroup) | - S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->legacy_gs_info.gs_inst_prims_in_subgroup); + gs_info->regs.vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(gs_info->legacy_gs_info.es_verts_per_subgroup) | + S_028A44_GS_PRIMS_PER_SUBGRP(gs_info->legacy_gs_info.gs_prims_per_subgroup) | + S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info->legacy_gs_info.gs_inst_prims_in_subgroup); - const uint32_t gs_max_out_vertices = info->gs.vertices_out; - const uint8_t max_stream = info->gs.num_components_per_stream[3] ? 3 - : info->gs.num_components_per_stream[2] ? 2 - : info->gs.num_components_per_stream[1] ? 1 + const uint32_t gs_max_out_vertices = gs_info->gs.vertices_out; + const uint8_t max_stream = gs_info->gs.num_components_per_stream[3] ? 3 + : gs_info->gs.num_components_per_stream[2] ? 2 + : gs_info->gs.num_components_per_stream[1] ? 1 : 0; - const uint8_t *num_components = info->gs.num_components_per_stream; + const uint8_t *num_components = gs_info->gs.num_components_per_stream; uint32_t offset = num_components[0] * gs_max_out_vertices; - info->regs.gs.vgt_gsvs_ring_offset[0] = offset; + gs_info->regs.gs.vgt_gsvs_ring_offset[0] = offset; if (max_stream >= 1) offset += num_components[1] * gs_max_out_vertices; - info->regs.gs.vgt_gsvs_ring_offset[1] = offset; + gs_info->regs.gs.vgt_gsvs_ring_offset[1] = offset; if (max_stream >= 2) offset += num_components[2] * gs_max_out_vertices; - info->regs.gs.vgt_gsvs_ring_offset[2] = offset; + gs_info->regs.gs.vgt_gsvs_ring_offset[2] = offset; if (max_stream >= 3) offset += num_components[3] * gs_max_out_vertices; - info->regs.gs.vgt_gsvs_ring_itemsize = offset; + gs_info->regs.gs.vgt_gsvs_ring_itemsize = offset; for (uint32_t i = 0; i < 4; i++) - info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0; + gs_info->regs.gs.vgt_gs_vert_itemsize[i] = (max_stream >= i) ? num_components[i] : 0; - const uint32_t gs_num_invocations = info->gs.invocations; - info->regs.gs.vgt_gs_instance_cnt = + const uint32_t gs_num_invocations = gs_info->gs.invocations; + gs_info->regs.gs.vgt_gs_instance_cnt = S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0); - info->regs.spi_shader_pgm_rsrc3_gs = + gs_info->regs.spi_shader_pgm_rsrc3_gs = ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &pdev->info); if (pdev->info.gfx_level >= GFX10) { - info->regs.spi_shader_pgm_rsrc4_gs = + gs_info->regs.spi_shader_pgm_rsrc4_gs = ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), C_00B204_CU_EN_GFX10, 16, &pdev->info); } - info->regs.vgt_gs_max_vert_out = info->gs.vertices_out; + gs_info->regs.vgt_gs_max_vert_out = gs_info->gs.vertices_out; } void @@ -1897,7 +1896,7 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary if (info->is_ngg) { radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info); } else { - radv_precompute_registers_hw_gs(device, binary); + radv_precompute_registers_hw_gs(device, NULL, &binary->info); } break; case MESA_SHADER_MESH: diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index a0a8572ede6..321604ee609 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -736,6 +736,8 @@ uint32_t radv_get_user_sgpr_loc(const struct radv_shader *shader, int idx); uint32_t radv_get_user_sgpr(const struct radv_shader *shader, int idx); +void radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info); + void radv_precompute_registers_hw_ngg(struct radv_device *device, const struct ac_shader_config *config, struct radv_shader_info *info); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index c44045ab07b..4e108632706 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -677,11 +677,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct } void -radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info) +radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info) { const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info; - const unsigned esgs_vertex_stride = out->esgs_itemsize * 4; + const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize * 4; ac_legacy_gs_subgroup_info info; ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations, diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index 3ab116d193a..1e6eeab11ed 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -329,7 +329,7 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, struct radv_shader_info *info); -void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info); +void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info); void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);