radeonsi: extract si_get_vs_prolog_args to be shared with aco

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24713>
This commit is contained in:
Qiang Yu 2023-08-04 20:06:18 +08:00 committed by Marge Bot
parent 07b62af810
commit ec57236824
3 changed files with 82 additions and 67 deletions

View file

@ -3546,3 +3546,40 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
for (unsigned i = 0; i < 6; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
}
void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
struct si_shader_args *args,
const union si_shader_part_key *key)
{
memset(args, 0, sizeof(*args));
unsigned num_input_sgprs = key->vs_prolog.num_input_sgprs;
unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
struct ac_arg input_sgprs[num_input_sgprs];
for (unsigned i = 0; i < num_input_sgprs; i++)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, input_sgprs + i);
struct ac_arg input_vgprs[num_input_vgprs];
for (unsigned i = 0; i < num_input_vgprs; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, input_vgprs + i);
if (key->vs_prolog.num_merged_next_stage_vgprs)
args->ac.merged_wave_info = input_sgprs[3];
unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
unsigned vertex_id_vgpr = first_vs_vgpr;
unsigned instance_id_vgpr = gfx_level >= GFX10 ?
first_vs_vgpr + 3 : first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
args->ac.vertex_id = input_vgprs[vertex_id_vgpr];
args->ac.instance_id = input_vgprs[instance_id_vgpr];
if (key->vs_prolog.as_ls && gfx_level < GFX11)
args->ac.vs_rel_patch_id = input_vgprs[first_vs_vgpr + 1];
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
args->internal_bindings = input_sgprs[user_sgpr_base + SI_SGPR_INTERNAL_BINDINGS];
args->ac.start_instance = input_sgprs[user_sgpr_base + SI_SGPR_START_INSTANCE];
args->ac.base_vertex = input_sgprs[user_sgpr_base + SI_SGPR_BASE_VERTEX];
}

View file

@ -142,6 +142,9 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
struct ac_arg *invocation_id,
struct ac_arg *tf_lds_offset,
struct ac_arg tess_factors[6]);
void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
struct si_shader_args *args,
const union si_shader_part_key *key);
/* gfx10_shader_ngg.c */
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);

View file

@ -12,13 +12,10 @@
static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
struct si_vs_prolog_bits *key, unsigned input_index,
LLVMValueRef instance_divisor_constbuf,
unsigned start_instance, unsigned base_vertex)
LLVMValueRef instance_divisor_constbuf)
{
LLVMValueRef instance_id = ctx->abi.instance_id_replaced ?
ctx->abi.instance_id_replaced : ctx->abi.instance_id;
LLVMValueRef vertex_id = ctx->abi.vertex_id_replaced ?
ctx->abi.vertex_id_replaced : ctx->abi.vertex_id;
LLVMValueRef instance_id = ctx->abi.instance_id;
LLVMValueRef vertex_id = ctx->abi.vertex_id;
bool divisor_is_one = key->instance_divisor_is_one & (1u << input_index);
bool divisor_is_fetched =key->instance_divisor_is_fetched & (1u << input_index);
@ -46,12 +43,12 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
if (divisor_is_one || divisor_is_fetched) {
/* Add StartInstance. */
index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMGetParam(ctx->main_fn.value, start_instance), "");
LLVMValueRef start_instance = ac_get_arg(&ctx->ac, ctx->args->ac.start_instance);
index = LLVMBuildAdd(ctx->ac.builder, index, start_instance, "");
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(ctx->ac.builder, vertex_id,
LLVMGetParam(ctx->main_fn.value, base_vertex), "");
LLVMValueRef base_vertex = ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
index = LLVMBuildAdd(ctx->ac.builder, vertex_id, base_vertex, "");
}
return index;
@ -75,47 +72,37 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
*/
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
{
LLVMTypeRef *returns;
LLVMValueRef ret, func;
int num_returns, i;
unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
unsigned num_input_vgprs =
key->vs_prolog.num_merged_next_stage_vgprs + 4;
struct ac_arg input_sgpr_param[key->vs_prolog.num_input_sgprs];
struct ac_arg input_vgpr_param[10];
LLVMValueRef input_vgprs[10];
unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs;
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
struct si_shader_args *args = ctx->args;
si_get_vs_prolog_args(ctx->screen->info.gfx_level, args, key);
memset(ctx->args, 0, sizeof(*ctx->args));
const unsigned num_input_sgprs = args->ac.num_sgprs_used;
const unsigned num_input_vgprs = args->ac.num_vgprs_used;
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) * sizeof(LLVMTypeRef));
num_returns = 0;
const unsigned num_output_gprs =
num_input_sgprs + num_input_vgprs + key->vs_prolog.num_inputs;
LLVMTypeRef returns[num_output_gprs];
int num_returns = 0;
/* Declare input and output SGPRs. */
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]);
/* Output SGPRs. */
for (int i = 0; i < num_input_sgprs; i++)
returns[num_returns++] = ctx->ac.i32;
}
/* Preloaded VGPRs (outputs must be floats) */
for (i = 0; i < num_input_vgprs; i++) {
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]);
/* Output VGPRs */
for (int i = 0; i < num_input_vgprs; i++)
returns[num_returns++] = ctx->ac.f32;
}
/* Vertex load indices. */
for (i = 0; i < key->vs_prolog.num_inputs; i++)
for (int i = 0; i < key->vs_prolog.num_inputs; i++)
returns[num_returns++] = ctx->ac.f32;
/* Create the function. */
si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0);
func = ctx->main_fn.value;
LLVMValueRef func = ctx->main_fn.value;
for (i = 0; i < num_input_vgprs; i++) {
input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);
}
LLVMValueRef input_vgprs[num_input_vgprs];
for (int i = 0; i < num_input_vgprs; i++)
input_vgprs[i] = LLVMGetParam(func, num_input_sgprs + i);
if (key->vs_prolog.num_merged_next_stage_vgprs) {
ac_init_exec_full_mask(&ctx->ac);
@ -125,65 +112,53 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
* starting at VGPR 0. Shift them back to where they
* belong.
*/
LLVMValueRef hs_thread_count =
si_unpack_param(ctx, args->ac.merged_wave_info, 8, 8);
LLVMValueRef has_hs_threads =
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
si_unpack_param(ctx, input_sgpr_param[3], 8, 8), ctx->ac.i32_0, "");
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, hs_thread_count, ctx->ac.i32_0, "");
for (i = 4; i > 0; --i) {
for (int i = 4; i > 0; --i) {
input_vgprs[i + 1] = LLVMBuildSelect(ctx->ac.builder, has_hs_threads,
input_vgprs[i + 1], input_vgprs[i - 1], "");
}
}
}
unsigned vertex_id_vgpr = first_vs_vgpr;
unsigned instance_id_vgpr = ctx->screen->info.gfx_level >= GFX10
? first_vs_vgpr + 3
: first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
ctx->abi.vertex_id_replaced = NULL;
ctx->abi.instance_id_replaced = NULL;
ctx->abi.vertex_id = input_vgprs[args->ac.vertex_id.arg_index - num_input_sgprs];
ctx->abi.instance_id = input_vgprs[args->ac.instance_id.arg_index - num_input_sgprs];
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
LLVMValueRef ret = ctx->return_value;
for (int i = 0; i < num_input_sgprs; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
}
for (i = 0; i < num_input_vgprs; i++) {
LLVMValueRef p = input_vgprs[i];
if (i == vertex_id_vgpr)
p = ctx->abi.vertex_id;
else if (i == instance_id_vgpr)
p = ctx->abi.instance_id;
p = ac_to_float(&ctx->ac, p);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, key->vs_prolog.num_input_sgprs + i, "");
for (int i = 0; i < num_input_vgprs; i++) {
LLVMValueRef p = ac_to_float(&ctx->ac, input_vgprs[i]);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, num_input_sgprs + i, "");
}
/* Compute vertex load indices from instance divisors. */
LLVMValueRef instance_divisor_constbuf = NULL;
if (key->vs_prolog.states.instance_divisor_is_fetched) {
LLVMValueRef list = si_prolog_get_internal_bindings(ctx);
LLVMValueRef list = ac_get_arg(&ctx->ac, args->internal_bindings);
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
instance_divisor_constbuf = ac_build_load_to_sgpr(&ctx->ac,
(struct ac_llvm_pointer) { .v = list, .t = ctx->ac.v4i32 }, buf_index);
}
for (i = 0; i < key->vs_prolog.num_inputs; i++) {
for (int i = 0; i < key->vs_prolog.num_inputs; i++) {
LLVMValueRef index = get_vertex_index(ctx, &key->vs_prolog.states, i,
instance_divisor_constbuf,
user_sgpr_base + SI_SGPR_START_INSTANCE,
user_sgpr_base + SI_SGPR_BASE_VERTEX);
instance_divisor_constbuf);
index = ac_to_float(&ctx->ac, index);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args->ac.arg_count + i, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, args->ac.arg_count + i, "");
}
si_llvm_build_ret(ctx, ret);