radeonsi: cleanup si_llvm_build_vs_exports gfx11 code

It's now completely handled in ac_nir_lower_ngg.c
export_vertex_params_gfx11.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109>
This commit is contained in:
Qiang Yu 2022-10-09 10:30:24 +08:00 committed by Marge Bot
parent 9b2ec290c4
commit da4f49d0ad
5 changed files with 9 additions and 66 deletions

View file

@ -119,7 +119,7 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], "");
}
si_llvm_build_vs_exports(ctx, NULL, outputs, num_outputs);
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
}
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,

View file

@ -222,7 +222,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
struct si_shader *shader, const struct pipe_stream_output_info *so,
struct util_debug_callback *debug, struct nir_shader *nir,
bool free_nir);
LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx);
/* si_shader_llvm_gs.c */
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
@ -263,7 +262,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef
struct si_shader_output_values *shader_out);
void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs,
unsigned noutput, unsigned stream);
void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_export_threads,
void si_llvm_build_vs_exports(struct si_shader_context *ctx,
struct si_shader_output_values *outputs, unsigned noutput);
void si_llvm_vs_build_end(struct si_shader_context *ctx);
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);

View file

@ -732,7 +732,7 @@ static LLVMValueRef si_get_num_vertices_per_prim(struct si_shader_context *ctx)
return LLVMConstInt(ctx->ac.i32, num_vertices, false);
}
LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx)
static LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;

View file

@ -526,7 +526,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
if (stream == 0) {
si_vertex_color_clamping(&ctx, outputs, gsinfo->num_outputs);
si_llvm_build_vs_exports(&ctx, NULL, outputs, gsinfo->num_outputs);
si_llvm_build_vs_exports(&ctx, outputs, gsinfo->num_outputs);
}
LLVMBuildBr(builder, end_bb);

View file

@ -503,10 +503,8 @@ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLV
/**
* Generate export instructions for hardware VS shader stage or NGG GS stage
* (position and parameter data only).
*
* \param num_export_threads The number of threads that are active for exports. Only used by gfx11.
*/
void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_export_threads,
void si_llvm_build_vs_exports(struct si_shader_context *ctx,
struct si_shader_output_values *outputs, unsigned noutput)
{
struct si_shader *shader = ctx->shader;
@ -720,63 +718,9 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_ex
&param_exports[offset]);
}
if (ctx->screen->info.gfx_level >= GFX11) {
/* Store primitive exports to alloca variables, so that we can read them outside this branch. */
for (unsigned i = 0; i < shader->info.nr_param_exports; i++) {
for (unsigned chan = 0; chan < 4; chan++) {
param_exports[i].out[chan] =
ac_build_alloca_init(&ctx->ac, param_exports[i].out[chan], "");
}
}
ac_build_endif(&ctx->ac, 0);
if (!num_export_threads)
num_export_threads = si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8);
/* We should always store full vec4s in groups of 8 lanes for the best performance even if
* some of them are garbage or have unused components, so align the number of export threads
* to 8.
*/
num_export_threads = LLVMBuildAdd(ctx->ac.builder, num_export_threads,
LLVMConstInt(ctx->ac.i32, 7, 0), "");
num_export_threads = LLVMBuildAnd(ctx->ac.builder, num_export_threads,
LLVMConstInt(ctx->ac.i32, ~7, 0), "");
ac_build_ifcc(&ctx->ac,
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
ac_get_thread_id(&ctx->ac), num_export_threads, ""), 0);
LLVMValueRef attr_rsrc = si_llvm_build_attr_ring_desc(ctx);
LLVMValueRef attr_offset = LLVMBuildShl(ctx->ac.builder,
si_unpack_param(ctx, ctx->args.gs_attr_offset, 0, 15),
LLVMConstInt(ctx->ac.i32, 9, 0), ""); /* 512B increments */
LLVMValueRef vindex = gfx10_get_thread_id_in_tg(ctx);
LLVMValueRef soffset[32];
/* Compute scalar offsets first. */
for (unsigned i = 0; i < shader->info.nr_param_exports; i++) {
soffset[i] = LLVMBuildAdd(ctx->ac.builder, attr_offset,
LLVMConstInt(ctx->ac.i32, 32 * i * 16, 0), "");
}
/* Write attributes to the attribute ring buffer. */
for (unsigned i = 0; i < shader->info.nr_param_exports; i++) {
for (unsigned chan = 0; chan < 4; chan++) {
param_exports[i].out[chan] =
LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, param_exports[i].out[chan], "");
}
LLVMValueRef vdata = ac_build_gather_values_extended(&ctx->ac, param_exports[i].out,
4, 1, false);
ac_build_buffer_store_dword(&ctx->ac, attr_rsrc, vdata, vindex,
ctx->ac.i32_0, soffset[i], ac_swizzled);
}
} else {
/* Export attributes using parameter exports. */
for (unsigned i = 0; i < shader->info.nr_param_exports; i++)
ac_build_export(&ctx->ac, &param_exports[i]);
}
/* Export attributes using parameter exports. */
for (unsigned i = 0; i < shader->info.nr_param_exports; i++)
ac_build_export(&ctx->ac, &param_exports[i]);
}
void si_llvm_vs_build_end(struct si_shader_context *ctx)
@ -813,7 +757,7 @@ void si_llvm_vs_build_end(struct si_shader_context *ctx)
i++;
}
si_llvm_build_vs_exports(ctx, NULL, outputs, i);
si_llvm_build_vs_exports(ctx, outputs, i);
FREE(outputs);
}