mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
radeonsi: extract si_get_vs_prolog_args to be shared with aco
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24713>
This commit is contained in:
parent
07b62af810
commit
ec57236824
3 changed files with 82 additions and 67 deletions
|
|
@ -3546,3 +3546,40 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
|
|||
for (unsigned i = 0; i < 6; i++)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
|
||||
}
|
||||
|
||||
void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
|
||||
struct si_shader_args *args,
|
||||
const union si_shader_part_key *key)
|
||||
{
|
||||
memset(args, 0, sizeof(*args));
|
||||
|
||||
unsigned num_input_sgprs = key->vs_prolog.num_input_sgprs;
|
||||
unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
|
||||
|
||||
struct ac_arg input_sgprs[num_input_sgprs];
|
||||
for (unsigned i = 0; i < num_input_sgprs; i++)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, input_sgprs + i);
|
||||
|
||||
struct ac_arg input_vgprs[num_input_vgprs];
|
||||
for (unsigned i = 0; i < num_input_vgprs; i++)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, input_vgprs + i);
|
||||
|
||||
if (key->vs_prolog.num_merged_next_stage_vgprs)
|
||||
args->ac.merged_wave_info = input_sgprs[3];
|
||||
|
||||
unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
|
||||
unsigned vertex_id_vgpr = first_vs_vgpr;
|
||||
unsigned instance_id_vgpr = gfx_level >= GFX10 ?
|
||||
first_vs_vgpr + 3 : first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
|
||||
|
||||
args->ac.vertex_id = input_vgprs[vertex_id_vgpr];
|
||||
args->ac.instance_id = input_vgprs[instance_id_vgpr];
|
||||
|
||||
if (key->vs_prolog.as_ls && gfx_level < GFX11)
|
||||
args->ac.vs_rel_patch_id = input_vgprs[first_vs_vgpr + 1];
|
||||
|
||||
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
|
||||
args->internal_bindings = input_sgprs[user_sgpr_base + SI_SGPR_INTERNAL_BINDINGS];
|
||||
args->ac.start_instance = input_sgprs[user_sgpr_base + SI_SGPR_START_INSTANCE];
|
||||
args->ac.base_vertex = input_sgprs[user_sgpr_base + SI_SGPR_BASE_VERTEX];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -142,6 +142,9 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
|
|||
struct ac_arg *invocation_id,
|
||||
struct ac_arg *tf_lds_offset,
|
||||
struct ac_arg tess_factors[6]);
|
||||
void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
|
||||
struct si_shader_args *args,
|
||||
const union si_shader_part_key *key);
|
||||
|
||||
/* gfx10_shader_ngg.c */
|
||||
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
|
||||
|
|
|
|||
|
|
@ -12,13 +12,10 @@
|
|||
|
||||
static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
|
||||
struct si_vs_prolog_bits *key, unsigned input_index,
|
||||
LLVMValueRef instance_divisor_constbuf,
|
||||
unsigned start_instance, unsigned base_vertex)
|
||||
LLVMValueRef instance_divisor_constbuf)
|
||||
{
|
||||
LLVMValueRef instance_id = ctx->abi.instance_id_replaced ?
|
||||
ctx->abi.instance_id_replaced : ctx->abi.instance_id;
|
||||
LLVMValueRef vertex_id = ctx->abi.vertex_id_replaced ?
|
||||
ctx->abi.vertex_id_replaced : ctx->abi.vertex_id;
|
||||
LLVMValueRef instance_id = ctx->abi.instance_id;
|
||||
LLVMValueRef vertex_id = ctx->abi.vertex_id;
|
||||
|
||||
bool divisor_is_one = key->instance_divisor_is_one & (1u << input_index);
|
||||
bool divisor_is_fetched =key->instance_divisor_is_fetched & (1u << input_index);
|
||||
|
|
@ -46,12 +43,12 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
|
|||
|
||||
if (divisor_is_one || divisor_is_fetched) {
|
||||
/* Add StartInstance. */
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index,
|
||||
LLVMGetParam(ctx->main_fn.value, start_instance), "");
|
||||
LLVMValueRef start_instance = ac_get_arg(&ctx->ac, ctx->args->ac.start_instance);
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index, start_instance, "");
|
||||
} else {
|
||||
/* VertexID + BaseVertex */
|
||||
index = LLVMBuildAdd(ctx->ac.builder, vertex_id,
|
||||
LLVMGetParam(ctx->main_fn.value, base_vertex), "");
|
||||
LLVMValueRef base_vertex = ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
|
||||
index = LLVMBuildAdd(ctx->ac.builder, vertex_id, base_vertex, "");
|
||||
}
|
||||
|
||||
return index;
|
||||
|
|
@ -75,47 +72,37 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx,
|
|||
*/
|
||||
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
|
||||
{
|
||||
LLVMTypeRef *returns;
|
||||
LLVMValueRef ret, func;
|
||||
int num_returns, i;
|
||||
unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
|
||||
unsigned num_input_vgprs =
|
||||
key->vs_prolog.num_merged_next_stage_vgprs + 4;
|
||||
struct ac_arg input_sgpr_param[key->vs_prolog.num_input_sgprs];
|
||||
struct ac_arg input_vgpr_param[10];
|
||||
LLVMValueRef input_vgprs[10];
|
||||
unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs;
|
||||
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
|
||||
struct si_shader_args *args = ctx->args;
|
||||
si_get_vs_prolog_args(ctx->screen->info.gfx_level, args, key);
|
||||
|
||||
memset(ctx->args, 0, sizeof(*ctx->args));
|
||||
const unsigned num_input_sgprs = args->ac.num_sgprs_used;
|
||||
const unsigned num_input_vgprs = args->ac.num_vgprs_used;
|
||||
|
||||
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
|
||||
returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) * sizeof(LLVMTypeRef));
|
||||
num_returns = 0;
|
||||
const unsigned num_output_gprs =
|
||||
num_input_sgprs + num_input_vgprs + key->vs_prolog.num_inputs;
|
||||
LLVMTypeRef returns[num_output_gprs];
|
||||
int num_returns = 0;
|
||||
|
||||
/* Declare input and output SGPRs. */
|
||||
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
|
||||
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]);
|
||||
/* Output SGPRs. */
|
||||
for (int i = 0; i < num_input_sgprs; i++)
|
||||
returns[num_returns++] = ctx->ac.i32;
|
||||
}
|
||||
|
||||
/* Preloaded VGPRs (outputs must be floats) */
|
||||
for (i = 0; i < num_input_vgprs; i++) {
|
||||
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]);
|
||||
/* Output VGPRs */
|
||||
for (int i = 0; i < num_input_vgprs; i++)
|
||||
returns[num_returns++] = ctx->ac.f32;
|
||||
}
|
||||
|
||||
/* Vertex load indices. */
|
||||
for (i = 0; i < key->vs_prolog.num_inputs; i++)
|
||||
for (int i = 0; i < key->vs_prolog.num_inputs; i++)
|
||||
returns[num_returns++] = ctx->ac.f32;
|
||||
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0);
|
||||
func = ctx->main_fn.value;
|
||||
LLVMValueRef func = ctx->main_fn.value;
|
||||
|
||||
for (i = 0; i < num_input_vgprs; i++) {
|
||||
input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);
|
||||
}
|
||||
LLVMValueRef input_vgprs[num_input_vgprs];
|
||||
for (int i = 0; i < num_input_vgprs; i++)
|
||||
input_vgprs[i] = LLVMGetParam(func, num_input_sgprs + i);
|
||||
|
||||
if (key->vs_prolog.num_merged_next_stage_vgprs) {
|
||||
ac_init_exec_full_mask(&ctx->ac);
|
||||
|
|
@ -125,65 +112,53 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
|
|||
* starting at VGPR 0. Shift them back to where they
|
||||
* belong.
|
||||
*/
|
||||
LLVMValueRef hs_thread_count =
|
||||
si_unpack_param(ctx, args->ac.merged_wave_info, 8, 8);
|
||||
LLVMValueRef has_hs_threads =
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
|
||||
si_unpack_param(ctx, input_sgpr_param[3], 8, 8), ctx->ac.i32_0, "");
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, hs_thread_count, ctx->ac.i32_0, "");
|
||||
|
||||
for (i = 4; i > 0; --i) {
|
||||
for (int i = 4; i > 0; --i) {
|
||||
input_vgprs[i + 1] = LLVMBuildSelect(ctx->ac.builder, has_hs_threads,
|
||||
input_vgprs[i + 1], input_vgprs[i - 1], "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned vertex_id_vgpr = first_vs_vgpr;
|
||||
unsigned instance_id_vgpr = ctx->screen->info.gfx_level >= GFX10
|
||||
? first_vs_vgpr + 3
|
||||
: first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
|
||||
|
||||
ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
|
||||
ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
|
||||
ctx->abi.vertex_id_replaced = NULL;
|
||||
ctx->abi.instance_id_replaced = NULL;
|
||||
ctx->abi.vertex_id = input_vgprs[args->ac.vertex_id.arg_index - num_input_sgprs];
|
||||
ctx->abi.instance_id = input_vgprs[args->ac.instance_id.arg_index - num_input_sgprs];
|
||||
|
||||
/* Copy inputs to outputs. This should be no-op, as the registers match,
|
||||
* but it will prevent the compiler from overwriting them unintentionally.
|
||||
*/
|
||||
ret = ctx->return_value;
|
||||
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
|
||||
LLVMValueRef ret = ctx->return_value;
|
||||
for (int i = 0; i < num_input_sgprs; i++) {
|
||||
LLVMValueRef p = LLVMGetParam(func, i);
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
|
||||
}
|
||||
for (i = 0; i < num_input_vgprs; i++) {
|
||||
LLVMValueRef p = input_vgprs[i];
|
||||
|
||||
if (i == vertex_id_vgpr)
|
||||
p = ctx->abi.vertex_id;
|
||||
else if (i == instance_id_vgpr)
|
||||
p = ctx->abi.instance_id;
|
||||
|
||||
p = ac_to_float(&ctx->ac, p);
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, key->vs_prolog.num_input_sgprs + i, "");
|
||||
for (int i = 0; i < num_input_vgprs; i++) {
|
||||
LLVMValueRef p = ac_to_float(&ctx->ac, input_vgprs[i]);
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, num_input_sgprs + i, "");
|
||||
}
|
||||
|
||||
/* Compute vertex load indices from instance divisors. */
|
||||
LLVMValueRef instance_divisor_constbuf = NULL;
|
||||
|
||||
if (key->vs_prolog.states.instance_divisor_is_fetched) {
|
||||
LLVMValueRef list = si_prolog_get_internal_bindings(ctx);
|
||||
LLVMValueRef list = ac_get_arg(&ctx->ac, args->internal_bindings);
|
||||
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
|
||||
ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
|
||||
|
||||
LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
|
||||
instance_divisor_constbuf = ac_build_load_to_sgpr(&ctx->ac,
|
||||
(struct ac_llvm_pointer) { .v = list, .t = ctx->ac.v4i32 }, buf_index);
|
||||
}
|
||||
|
||||
for (i = 0; i < key->vs_prolog.num_inputs; i++) {
|
||||
for (int i = 0; i < key->vs_prolog.num_inputs; i++) {
|
||||
LLVMValueRef index = get_vertex_index(ctx, &key->vs_prolog.states, i,
|
||||
instance_divisor_constbuf,
|
||||
user_sgpr_base + SI_SGPR_START_INSTANCE,
|
||||
user_sgpr_base + SI_SGPR_BASE_VERTEX);
|
||||
instance_divisor_constbuf);
|
||||
|
||||
index = ac_to_float(&ctx->ac, index);
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args->ac.arg_count + i, "");
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, args->ac.arg_count + i, "");
|
||||
}
|
||||
|
||||
si_llvm_build_ret(ctx, ret);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue