radv: add support for emitting NGG shaders with ESO

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27724>
This commit is contained in:
Samuel Pitoiset 2024-02-21 15:46:00 +01:00 committed by Marge Bot
parent 85d682b318
commit 426d8b5216
4 changed files with 59 additions and 8 deletions

View file

@ -3970,8 +3970,16 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
if (vs_shader->info.merged_shader_compiled_separately) {
if (vs_shader->info.next_stage == MESA_SHADER_GEOMETRY) {
const struct radv_shader *gs = cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY];
unsigned lds_size;
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2 | S_00B22C_LDS_SIZE(gs->info.gs_ring_info.lds_size));
if (gs->info.is_ngg) {
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size,
cmd_buffer->device->physical_device->rad_info.lds_encode_granularity);
} else {
lds_size = gs->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
} else {
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2);
}
@ -9372,6 +9380,18 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.gs_copy_shader->bo);
}
/* Determine NGG GS info. */
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] &&
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg &&
cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.merged_shader_compiled_separately) {
struct radv_shader *es = cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
? cmd_buffer->state.shaders[MESA_SHADER_TESS_EVAL]
: cmd_buffer->state.shaders[MESA_SHADER_VERTEX];
struct radv_shader *gs = cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY];
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
}
/* Determine the rasterized primitive. */
if (cmd_buffer->state.active_stages &
(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |

View file

@ -2968,11 +2968,13 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
es_type = shader->info.stage;
}
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
if (!shader->info.merged_shader_compiled_separately) {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
}
const struct radv_vs_output_info *outinfo = &shader->info.outinfo;
unsigned clip_dist_mask, cull_dist_mask, total_mask;
@ -3160,9 +3162,17 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
}
unsigned lds_size;
if (next_stage->info.is_ngg) {
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size,
device->physical_device->rad_info.lds_encode_granularity);
} else {
lds_size = next_stage->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, rsrc1);
radeon_emit(cs, rsrc2 | S_00B22C_LDS_SIZE(next_stage->info.gs_ring_info.lds_size));
radeon_emit(cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
}
}
@ -3208,9 +3218,16 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
unsigned lds_size;
if (gs->info.is_ngg) {
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, device->physical_device->rad_info.lds_encode_granularity);
} else {
lds_size = gs->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_emit(cs, rsrc1);
radeon_emit(cs, rsrc2 | S_00B22C_LDS_SIZE(gs->info.gs_ring_info.lds_size));
radeon_emit(cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, gs->va, false);
return;
@ -3329,6 +3346,17 @@ radv_emit_geometry_shader(const struct radv_device *device, struct radeon_cmdbuf
assert(vgt_esgs_ring_itemsize->sgpr_idx != -1 && vgt_esgs_ring_itemsize->num_sgprs == 1);
radeon_set_sh_reg(cs, gs->info.user_data_0 + vgt_esgs_ring_itemsize->sgpr_idx * 4, es->info.esgs_itemsize / 4);
if (gs->info.is_ngg) {
const struct radv_userdata_info *ngg_lds_layout = radv_get_user_sgpr(gs, AC_UD_NGG_LDS_LAYOUT);
assert(ngg_lds_layout->sgpr_idx != -1 && ngg_lds_layout->num_sgprs == 1);
assert(!(gs->info.ngg_info.esgs_ring_size & 0xffff0000) && !(gs->info.ngg_info.scratch_lds_base & 0xffff0000));
radeon_set_sh_reg(cs, gs->info.user_data_0 + ngg_lds_layout->sgpr_idx * 4,
SET_SGPR_FIELD(NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE, gs->info.ngg_info.esgs_ring_size) |
SET_SGPR_FIELD(NGG_LDS_LAYOUT_SCRATCH_BASE, gs->info.ngg_info.scratch_lds_base));
}
}
}

View file

@ -1058,6 +1058,9 @@ void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage
void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stages);
void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info,
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);
void radv_shader_combine_cfg_vs_tcs(const struct radv_shader *vs, const struct radv_shader *tcs, uint32_t *rsrc1_out,
uint32_t *rsrc2_out);

View file

@ -1395,7 +1395,7 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
return scratch_lds_base;
}
static void
void
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
struct gfx10_ngg_info *out)
{