radeonsi: separate shader args from llvm

Move shader args out of llvm context, so that we can init
it before get nir. This is for creating a nir lower abi pass
which load args directly in nir.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>
This commit is contained in:
Qiang Yu 2022-08-08 22:21:26 +08:00 committed by Marge Bot
parent 003cbddfee
commit 0007c10c1e
10 changed files with 518 additions and 485 deletions

View file

@ -28,7 +28,7 @@
static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);
return si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 24, 4);
}
LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx)
@ -42,13 +42,15 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx)
static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx)
{
return ac_build_load_to_sgpr(&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings),
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings),
LLVMConstInt(ctx->ac.i32, SI_GS_QUERY_BUF, false));
}
static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx)
{
return ac_build_load_to_sgpr(&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings),
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings),
LLVMConstInt(ctx->ac.i32, SI_GS_QUERY_EMULATED_COUNTERS_BUF, false));
}

View file

@ -196,28 +196,30 @@ static void si_dump_streamout(struct pipe_stream_output_info *so)
}
}
static void declare_streamout_params(struct si_shader_context *ctx)
static void declare_streamout_params(struct si_shader_args *args, struct si_shader *shader)
{
if (ctx->screen->use_ngg_streamout) {
if (ctx->stage == MESA_SHADER_TESS_EVAL)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
struct si_shader_selector *sel = shader->selector;
if (sel->screen->use_ngg_streamout) {
if (sel->stage == MESA_SHADER_TESS_EVAL)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return;
}
/* Streamout SGPRs. */
if (si_shader_uses_streamout(ctx->shader)) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_config);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_write_index);
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
if (si_shader_uses_streamout(shader)) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
} else if (sel->stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (int i = 0; i < 4; i++) {
if (!ctx->shader->selector->info.base.xfb_stride[i])
if (!sel->info.base.xfb_stride[i])
continue;
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_offset[i]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
}
}
@ -256,131 +258,135 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
return max_work_group_size;
}
static void declare_const_and_shader_buffers(struct si_shader_context *ctx, bool assign_params)
static void declare_const_and_shader_buffers(struct si_shader_args *args,
struct si_shader *shader,
bool assign_params)
{
enum ac_arg_type const_shader_buf_type;
if (ctx->shader->selector->info.base.num_ubos == 1 &&
ctx->shader->selector->info.base.num_ssbos == 0)
if (shader->selector->info.base.num_ubos == 1 &&
shader->selector->info.base.num_ssbos == 0)
const_shader_buf_type = AC_ARG_CONST_FLOAT_PTR;
else
const_shader_buf_type = AC_ARG_CONST_DESC_PTR;
ac_add_arg(
&ctx->args, AC_ARG_SGPR, 1, const_shader_buf_type,
assign_params ? &ctx->const_and_shader_buffers : &ctx->other_const_and_shader_buffers);
&args->ac, AC_ARG_SGPR, 1, const_shader_buf_type,
assign_params ? &args->const_and_shader_buffers : &args->other_const_and_shader_buffers);
}
static void declare_samplers_and_images(struct si_shader_context *ctx, bool assign_params)
static void declare_samplers_and_images(struct si_shader_args *args, bool assign_params)
{
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_IMAGE_PTR,
assign_params ? &ctx->samplers_and_images : &ctx->other_samplers_and_images);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_IMAGE_PTR,
assign_params ? &args->samplers_and_images : &args->other_samplers_and_images);
}
static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, bool assign_params)
static void declare_per_stage_desc_pointers(struct si_shader_args *args,
struct si_shader *shader,
bool assign_params)
{
declare_const_and_shader_buffers(ctx, assign_params);
declare_samplers_and_images(ctx, assign_params);
declare_const_and_shader_buffers(args, shader, assign_params);
declare_samplers_and_images(args, assign_params);
}
static void declare_global_desc_pointers(struct si_shader_context *ctx)
static void declare_global_desc_pointers(struct si_shader_args *args)
{
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->internal_bindings);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_IMAGE_PTR,
&ctx->bindless_samplers_and_images);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->internal_bindings);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_IMAGE_PTR,
&args->bindless_samplers_and_images);
}
static void declare_vb_descriptor_input_sgprs(struct si_shader_context *ctx)
static void declare_vb_descriptor_input_sgprs(struct si_shader_args *args,
struct si_shader *shader)
{
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->args.vertex_buffers);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
unsigned num_vbos_in_user_sgprs = shader->selector->info.num_vbos_in_user_sgprs;
if (num_vbos_in_user_sgprs) {
unsigned user_sgprs = ctx->args.num_sgprs_used;
unsigned user_sgprs = args->ac.num_sgprs_used;
if (si_is_merged_shader(ctx->shader))
if (si_is_merged_shader(shader))
user_sgprs -= 8;
assert(user_sgprs <= SI_SGPR_VS_VB_DESCRIPTOR_FIRST);
/* Declare unused SGPRs to align VB descriptors to 4 SGPRs (hw requirement). */
for (unsigned i = user_sgprs; i < SI_SGPR_VS_VB_DESCRIPTOR_FIRST; i++)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
assert(num_vbos_in_user_sgprs <= ARRAY_SIZE(ctx->vb_descriptors));
assert(num_vbos_in_user_sgprs <= ARRAY_SIZE(args->vb_descriptors));
for (unsigned i = 0; i < num_vbos_in_user_sgprs; i++)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 4, AC_ARG_INT, &ctx->vb_descriptors[i]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 4, AC_ARG_INT, &args->vb_descriptors[i]);
}
}
static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_prolog_vgprs)
static void declare_vs_input_vgprs(struct si_shader_args *args, struct si_shader *shader,
unsigned *num_prolog_vgprs)
{
struct si_shader *shader = ctx->shader;
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (shader->key.ge.as_ls) {
if (ctx->screen->info.gfx_level >= GFX11) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
} else if (ctx->screen->info.gfx_level >= GFX10) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
if (shader->selector->screen->info.gfx_level >= GFX11) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else if (shader->selector->screen->info.gfx_level >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
} else if (ctx->screen->info.gfx_level >= GFX10) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
} else if (shader->selector->screen->info.gfx_level >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
/* user vgpr or PrimID (legacy) */
shader->key.ge.as_ngg ? NULL : &ctx->args.vs_prim_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
shader->key.ge.as_ngg ? NULL : &args->ac.vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_prim_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
if (shader->selector->info.num_inputs) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->vertex_index0);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vertex_index0);
for (unsigned i = 1; i < shader->selector->info.num_inputs; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
}
*num_prolog_vgprs += shader->selector->info.num_inputs;
}
}
static void declare_vs_blit_inputs(struct si_shader_context *ctx, unsigned vs_blit_property)
static void declare_vs_blit_inputs(struct si_shader_args *args, unsigned vs_blit_property)
{
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_blit_inputs); /* i16 x1, y1 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* i16 x1, y1 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* depth */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_blit_inputs); /* i16 x1, y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* i16 x1, y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* depth */
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color0 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color1 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color2 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color3 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color0 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color3 */
} else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x1 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y1 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x2 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y2 */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.z */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.w */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.z */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.w */
}
}
static void declare_tes_input_vgprs(struct si_shader_context *ctx)
static void declare_tes_input_vgprs(struct si_shader_args *args)
{
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_u);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_v);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_rel_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
}
enum
@ -397,138 +403,139 @@ void si_add_arg_checked(struct ac_shader_args *args, enum ac_arg_regfile file, u
ac_add_arg(args, file, registers, type, arg);
}
void si_init_shader_args(struct si_shader_context *ctx)
void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
{
struct si_shader *shader = ctx->shader;
unsigned i, num_returns, num_return_sgprs;
unsigned num_prolog_vgprs = 0;
unsigned stage = ctx->stage;
struct si_shader_selector *sel = shader->selector;
unsigned stage = shader->is_gs_copy_shader ? MESA_SHADER_VERTEX : sel->stage;
unsigned stage_case = stage;
memset(&ctx->args, 0, sizeof(ctx->args));
memset(args, 0, sizeof(*args));
/* Set MERGED shaders. */
if (ctx->screen->info.gfx_level >= GFX9 && stage <= MESA_SHADER_GEOMETRY) {
if (sel->screen->info.gfx_level >= GFX9 && stage <= MESA_SHADER_GEOMETRY) {
if (shader->key.ge.as_ls || stage == MESA_SHADER_TESS_CTRL)
stage = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
stage_case = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
else if (shader->key.ge.as_es || shader->key.ge.as_ngg || stage == MESA_SHADER_GEOMETRY)
stage = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
stage_case = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
}
switch (stage) {
switch (stage_case) {
case MESA_SHADER_VERTEX:
declare_global_desc_pointers(ctx);
declare_global_desc_pointers(args);
if (shader->selector->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(ctx, shader->selector->info.base.vs.blit_sgprs_amd);
if (sel->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(args, sel->info.base.vs.blit_sgprs_amd);
/* VGPRs */
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
declare_vs_input_vgprs(args, shader, &num_prolog_vgprs);
break;
}
declare_per_stage_desc_pointers(ctx, true);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
declare_per_stage_desc_pointers(args, shader, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
if (ctx->shader->is_gs_copy_shader) {
declare_streamout_params(ctx);
if (shader->is_gs_copy_shader) {
declare_streamout_params(args, shader);
/* VGPRs */
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
declare_vs_input_vgprs(args, shader, &num_prolog_vgprs);
break;
}
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.base_vertex);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.draw_id);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.start_instance);
declare_vb_descriptor_input_sgprs(ctx);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
declare_vb_descriptor_input_sgprs(args, shader);
if (shader->key.ge.as_es) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
} else if (shader->key.ge.as_ls) {
/* no extra parameters */
} else {
declare_streamout_params(ctx);
declare_streamout_params(args, shader);
}
/* VGPRs */
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
declare_vs_input_vgprs(args, shader, &num_prolog_vgprs);
break;
case MESA_SHADER_TESS_CTRL: /* GFX6-GFX8 */
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, true);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_offsets);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
/* VGPRs */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
ac_add_return(&ctx->args, AC_ARG_SGPR);
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
break;
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
/* Gfx9-10: SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
/* Gfx11+: SPI_SHADER_PGM_LO/HI_HS */
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_TESS_CTRL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
if (ctx->screen->info.gfx_level >= GFX11)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_wave_id);
declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_TESS_CTRL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
if (sel->screen->info.gfx_level >= GFX11)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_wave_id);
else
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_VERTEX);
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_VERTEX);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.base_vertex);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.draw_id);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.start_instance);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
if (ctx->stage == MESA_SHADER_VERTEX)
declare_vb_descriptor_input_sgprs(ctx);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_offsets);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_layout);
if (stage == MESA_SHADER_VERTEX)
declare_vb_descriptor_input_sgprs(args, shader);
/* VGPRs (first TCS, then VS) */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
if (ctx->stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
if (stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(args, shader, &num_prolog_vgprs);
/* LS return values are inputs to the TCS main shader part. */
for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
ac_add_return(&ctx->args, AC_ARG_SGPR);
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 2; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
/* VS outputs passed via VGPRs to TCS. */
if (shader->key.ge.opt.same_patch_vertices) {
unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
for (i = 0; i < num_outputs * 4; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
}
} else {
/* TCS inputs are passed via VGPRs from VS. */
if (shader->key.ge.opt.same_patch_vertices) {
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written);
for (i = 0; i < num_inputs * 4; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
}
/* TCS return values are inputs to the TCS epilog.
@ -538,9 +545,9 @@ void si_init_shader_args(struct si_shader_context *ctx)
* should be passed to the epilog.
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
ac_add_return(&ctx->args, AC_ARG_SGPR);
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
}
break;
@ -548,157 +555,157 @@ void si_init_shader_args(struct si_shader_context *ctx)
/* Merged stages have 8 system SGPRs at the beginning. */
/* Gfx9-10: SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
/* Gfx11+: SPI_SHADER_PGM_LO/HI_GS */
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY);
declare_per_stage_desc_pointers(args, shader, stage == MESA_SHADER_GEOMETRY);
if (ctx->shader->key.ge.as_ngg)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info);
if (shader->key.ge.as_ngg)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
else
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
if (ctx->screen->info.gfx_level >= GFX11)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_attr_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
if (sel->screen->info.gfx_level >= GFX11)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_attr_offset);
else
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
declare_global_desc_pointers(ctx);
if (ctx->stage != MESA_SHADER_VERTEX || !shader->selector->info.base.vs.blit_sgprs_amd) {
declare_global_desc_pointers(args);
if (stage != MESA_SHADER_VERTEX || !sel->info.base.vs.blit_sgprs_amd) {
declare_per_stage_desc_pointers(
ctx, (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL));
args, shader, (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL));
}
if (ctx->stage == MESA_SHADER_VERTEX && shader->selector->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(ctx, shader->selector->info.base.vs.blit_sgprs_amd);
if (stage == MESA_SHADER_VERTEX && sel->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(args, sel->info.base.vs.blit_sgprs_amd);
} else {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
if (ctx->stage == MESA_SHADER_VERTEX) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.base_vertex);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.draw_id);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.start_instance);
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
if (stage == MESA_SHADER_VERTEX) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
} else if (stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
} else {
/* GS */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
}
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info);
if (ctx->screen->info.gfx_level >= GFX11)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_attr_address);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->small_prim_cull_info);
if (sel->screen->info.gfx_level >= GFX11)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_attr_address);
else
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
if (ctx->stage == MESA_SHADER_VERTEX)
declare_vb_descriptor_input_sgprs(ctx);
if (stage == MESA_SHADER_VERTEX)
declare_vb_descriptor_input_sgprs(args, shader);
}
/* VGPRs (first GS, then VS/TES) */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
if (ctx->stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
declare_tes_input_vgprs(ctx);
if (stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(args, shader, &num_prolog_vgprs);
} else if (stage == MESA_SHADER_TESS_EVAL) {
declare_tes_input_vgprs(args);
}
if (ctx->shader->key.ge.as_es &&
(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
if (shader->key.ge.as_es &&
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL)) {
/* ES return values are inputs to GS. */
for (i = 0; i < 8 + GFX9_GS_NUM_USER_SGPR; i++)
ac_add_return(&ctx->args, AC_ARG_SGPR);
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 5; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
}
break;
case MESA_SHADER_TESS_EVAL:
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, true);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr);
if (shader->key.ge.as_es) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
} else {
declare_streamout_params(ctx);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
declare_streamout_params(args, shader);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
}
/* VGPRs */
declare_tes_input_vgprs(ctx);
declare_tes_input_vgprs(args);
break;
case MESA_SHADER_GEOMETRY:
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, true);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_wave_id);
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
/* VGPRs */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[2]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[3]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[4]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[5]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
break;
case MESA_SHADER_FRAGMENT:
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, true);
si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL, SI_PARAM_ALPHA_REF);
si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.prim_mask,
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, true);
si_add_arg_checked(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL, SI_PARAM_ALPHA_REF);
si_add_arg_checked(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask,
SI_PARAM_PRIM_MASK);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_sample,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample,
SI_PARAM_PERSP_SAMPLE);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_center,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center,
SI_PARAM_PERSP_CENTER);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.persp_centroid,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid,
SI_PARAM_PERSP_CENTROID);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT, NULL, SI_PARAM_PERSP_PULL_MODEL);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_sample,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, NULL, SI_PARAM_PERSP_PULL_MODEL);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample,
SI_PARAM_LINEAR_SAMPLE);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_center,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center,
SI_PARAM_LINEAR_CENTER);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 2, AC_ARG_INT, &ctx->args.linear_centroid,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid,
SI_PARAM_LINEAR_CENTROID);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL, SI_PARAM_LINE_STIPPLE_TEX);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[0],
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL, SI_PARAM_LINE_STIPPLE_TEX);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0],
SI_PARAM_POS_X_FLOAT);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[1],
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1],
SI_PARAM_POS_Y_FLOAT);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[2],
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2],
SI_PARAM_POS_Z_FLOAT);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.frag_pos[3],
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3],
SI_PARAM_POS_W_FLOAT);
shader->info.face_vgpr_index = ctx->args.num_vgprs_used;
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.front_face,
shader->info.face_vgpr_index = args->ac.num_vgprs_used;
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face,
SI_PARAM_FRONT_FACE);
shader->info.ancillary_vgpr_index = ctx->args.num_vgprs_used;
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.ancillary,
shader->info.ancillary_vgpr_index = args->ac.num_vgprs_used;
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary,
SI_PARAM_ANCILLARY);
shader->info.sample_coverage_vgpr_index = ctx->args.num_vgprs_used;
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.sample_coverage,
shader->info.sample_coverage_vgpr_index = args->ac.num_vgprs_used;
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.sample_coverage,
SI_PARAM_SAMPLE_COVERAGE);
si_add_arg_checked(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->pos_fixed_pt,
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->pos_fixed_pt,
SI_PARAM_POS_FIXED_PT);
/* Color inputs from the prolog. */
@ -706,7 +713,7 @@ void si_init_shader_args(struct si_shader_context *ctx)
unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read);
for (i = 0; i < num_color_elements; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
num_prolog_vgprs += num_color_elements;
}
@ -718,67 +725,67 @@ void si_init_shader_args(struct si_shader_context *ctx)
shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
for (i = 0; i < num_return_sgprs; i++)
ac_add_return(&ctx->args, AC_ARG_SGPR);
ac_add_return(&args->ac, AC_ARG_SGPR);
for (; i < num_returns; i++)
ac_add_return(&ctx->args, AC_ARG_VGPR);
ac_add_return(&args->ac, AC_ARG_VGPR);
break;
case MESA_SHADER_COMPUTE:
declare_global_desc_pointers(ctx);
declare_per_stage_desc_pointers(ctx, true);
declare_global_desc_pointers(args);
declare_per_stage_desc_pointers(args, shader, true);
if (shader->selector->info.uses_grid_size)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->args.num_work_groups);
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
if (shader->selector->info.uses_variable_block_size)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->block_size);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->block_size);
unsigned cs_user_data_dwords =
shader->selector->info.base.cs.user_data_components_amd;
if (cs_user_data_dwords) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, cs_user_data_dwords, AC_ARG_INT, &ctx->cs_user_data);
ac_add_arg(&args->ac, AC_ARG_SGPR, cs_user_data_dwords, AC_ARG_INT, &args->cs_user_data);
}
/* Some descriptors can be in user SGPRs. */
/* Shader buffers in user SGPRs. */
for (unsigned i = 0; i < shader->selector->cs_num_shaderbufs_in_user_sgprs; i++) {
while (ctx->args.num_sgprs_used % 4 != 0)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
while (args->ac.num_sgprs_used % 4 != 0)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 4, AC_ARG_INT, &ctx->cs_shaderbuf[i]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 4, AC_ARG_INT, &args->cs_shaderbuf[i]);
}
/* Images in user SGPRs. */
for (unsigned i = 0; i < shader->selector->cs_num_images_in_user_sgprs; i++) {
unsigned num_sgprs = BITSET_TEST(shader->selector->info.base.image_buffers, i) ? 4 : 8;
while (ctx->args.num_sgprs_used % num_sgprs != 0)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
while (args->ac.num_sgprs_used % num_sgprs != 0)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, num_sgprs, AC_ARG_INT, &ctx->cs_image[i]);
ac_add_arg(&args->ac, AC_ARG_SGPR, num_sgprs, AC_ARG_INT, &args->cs_image[i]);
}
/* Hardware SGPRs. */
for (i = 0; i < 3; i++) {
if (shader->selector->info.uses_block_id[i]) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.workgroup_ids[i]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
}
}
if (shader->selector->info.uses_subgroup_info)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tg_size);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
/* Hardware VGPRs. */
/* Thread IDs are packed in VGPR0, 10 bits per component or stored in 3 separate VGPRs */
if (ctx->screen->info.gfx_level >= GFX11 ||
(!ctx->screen->info.has_graphics && ctx->screen->info.family >= CHIP_MI200))
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.local_invocation_ids);
if (sel->screen->info.gfx_level >= GFX11 ||
(!sel->screen->info.has_graphics && sel->screen->info.family >= CHIP_MI200))
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
else
ac_add_arg(&ctx->args, AC_ARG_VGPR, 3, AC_ARG_INT, &ctx->args.local_invocation_ids);
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
break;
default:
assert(0 && "unimplemented shader");
return;
}
shader->info.num_input_sgprs = ctx->args.num_sgprs_used;
shader->info.num_input_vgprs = ctx->args.num_vgprs_used;
shader->info.num_input_sgprs = args->ac.num_sgprs_used;
shader->info.num_input_vgprs = args->ac.num_vgprs_used;
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;
@ -1964,6 +1971,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
struct si_shader *shader, struct util_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_args args;
si_init_shader_args(shader, &args);
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(shader, &free_nir, 0);
@ -2021,7 +2032,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
* with PS and NGG VS), but monolithic shaders should be compiled
* by LLVM due to more complicated compilation.
*/
if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
if (!si_llvm_compile_shader(sscreen, compiler, shader, &args, &so, debug, nir, free_nir))
return false;
shader->config.float_mode = float_mode;
@ -2193,6 +2204,9 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
ctx.shader = &shader;
ctx.stage = stage;
struct si_shader_args args;
ctx.args = &args;
build(&ctx, key);
/* Compile. */

View file

@ -292,7 +292,7 @@ enum
} while (0)
/* This is called during shader compilation and returns LLVMValueRef. */
#define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->vs_state_bits, field##__SHIFT, \
#define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->args->vs_state_bits, field##__SHIFT, \
util_bitcount(field##__MASK))
enum

View file

@ -36,28 +36,8 @@ struct si_shader_output_values {
ubyte semantic;
};
struct si_shader_context {
struct ac_llvm_context ac;
struct si_shader *shader;
struct si_screen *screen;
struct pipe_stream_output_info so;
gl_shader_stage stage;
/* For clamping the non-constant index in resource indexing: */
unsigned num_const_buffers;
unsigned num_shader_buffers;
unsigned num_images;
unsigned num_samplers;
struct ac_shader_args args;
struct ac_shader_abi abi;
LLVMBasicBlockRef merged_wrap_if_entry_block;
int merged_wrap_if_label;
struct ac_llvm_pointer main_fn;
LLVMTypeRef return_type;
struct si_shader_args {
struct ac_shader_args ac;
struct ac_arg const_and_shader_buffers;
struct ac_arg samplers_and_images;
@ -132,6 +112,30 @@ struct si_shader_context {
struct ac_arg cs_user_data;
struct ac_arg cs_shaderbuf[3];
struct ac_arg cs_image[3];
};
struct si_shader_context {
struct ac_llvm_context ac;
struct si_shader *shader;
struct si_screen *screen;
struct pipe_stream_output_info so;
gl_shader_stage stage;
/* For clamping the non-constant index in resource indexing: */
unsigned num_const_buffers;
unsigned num_shader_buffers;
unsigned num_images;
unsigned num_samplers;
struct si_shader_args *args;
struct ac_shader_abi abi;
LLVMBasicBlockRef merged_wrap_if_entry_block;
int merged_wrap_if_label;
struct ac_llvm_pointer main_fn;
LLVMTypeRef return_type;
struct ac_llvm_compiler *compiler;
@ -158,7 +162,7 @@ bool si_is_multi_part_shader(struct si_shader *shader);
bool si_is_merged_shader(struct si_shader *shader);
void si_add_arg_checked(struct ac_shader_args *args, enum ac_arg_regfile file, unsigned registers,
enum ac_arg_type type, struct ac_arg *arg, unsigned idx);
void si_init_shader_args(struct si_shader_context *ctx);
void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args);
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
bool si_vs_needs_prolog(const struct si_shader_selector *sel,
const struct si_vs_prolog_bits *prolog_key);
@ -219,7 +223,8 @@ void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_poi
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
struct nir_shader *nir, bool free_nir);
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, const struct pipe_stream_output_info *so,
struct si_shader *shader, struct si_shader_args *args,
const struct pipe_stream_output_info *so,
struct util_debug_callback *debug, struct nir_shader *nir,
bool free_nir);

View file

@ -181,7 +181,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
/* Setup the function */
ctx->return_type = ret_type;
ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name, ret_type, ctx->ac.module);
ctx->main_fn = ac_build_main(&ctx->args->ac, &ctx->ac, call_conv, name, ret_type, ctx->ac.module);
ctx->return_value = LLVMGetUndef(ctx->return_type);
if (ctx->screen->info.address32_hi) {
@ -203,14 +203,12 @@ void si_llvm_create_main_func(struct si_shader_context *ctx)
LLVMTypeRef returns[AC_MAX_ARGS];
unsigned i;
si_init_shader_args(ctx);
for (i = 0; i < ctx->args.num_sgprs_returned; i++)
for (i = 0; i < ctx->args->ac.num_sgprs_returned; i++)
returns[i] = ctx->ac.i32; /* SGPR */
for (; i < ctx->args.return_count; i++)
for (; i < ctx->args->ac.return_count; i++)
returns[i] = ctx->ac.f32; /* VGPR */
si_llvm_create_func(ctx, "main", returns, ctx->args.return_count,
si_llvm_create_func(ctx, "main", returns, ctx->args->ac.return_count,
si_get_max_workgroup_size(shader));
/* Reserve register locations for VGPR inputs the PS prolog may need. */
@ -244,11 +242,11 @@ void si_llvm_create_main_func(struct si_shader_context *ctx)
* API shader they appear as normal arguments.
*/
if (ctx->stage == MESA_SHADER_VERTEX) {
ctx->abi.vertex_id = ac_get_arg(&ctx->ac, ctx->args.vertex_id);
ctx->abi.instance_id = ac_get_arg(&ctx->ac, ctx->args.instance_id);
ctx->abi.vertex_id = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id);
ctx->abi.instance_id = ac_get_arg(&ctx->ac, ctx->args->ac.instance_id);
} else if (ctx->stage == MESA_SHADER_FRAGMENT) {
ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args.persp_centroid);
ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args.linear_centroid);
ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.persp_centroid);
ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.linear_centroid);
}
}
@ -387,15 +385,15 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle
switch (ctx->stage) {
case MESA_SHADER_VERTEX:
return ac_get_arg(&ctx->ac, ctx->args.vs_prim_id);
return ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
case MESA_SHADER_TESS_CTRL:
return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id);
return ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id);
case MESA_SHADER_TESS_EVAL:
return ctx->abi.tes_patch_id_replaced ?
ctx->abi.tes_patch_id_replaced :
ac_get_arg(&ctx->ac, ctx->args.tes_patch_id);
ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
case MESA_SHADER_GEOMETRY:
return ac_get_arg(&ctx->ac, ctx->args.gs_prim_id);
return ac_get_arg(&ctx->ac, ctx->args->ac.gs_prim_id);
default:
assert(0);
return ctx->ac.i32_0;
@ -445,7 +443,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_poi
unsigned num_sgprs, num_vgprs;
unsigned gprs;
memset(&ctx->args, 0, sizeof(ctx->args));
memset(ctx->args, 0, sizeof(*ctx->args));
for (unsigned i = 0; i < num_parts; ++i) {
ac_add_function_attr(ctx->ac.context, parts[i].value, -1, AC_FUNC_ATTR_ALWAYSINLINE);
@ -476,13 +474,13 @@ void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_poi
gprs = 0;
while (gprs < num_sgprs + num_vgprs) {
LLVMValueRef param = LLVMGetParam(parts[main_part].value, ctx->args.arg_count);
LLVMValueRef param = LLVMGetParam(parts[main_part].value, ctx->args->ac.arg_count);
LLVMTypeRef type = LLVMTypeOf(param);
unsigned size = ac_get_type_size(type) / 4;
enum ac_arg_type arg_type = main_arg_types[ctx->args.arg_count];
enum ac_arg_type arg_type = main_arg_types[ctx->args->ac.arg_count];
assert(arg_type != AC_ARG_INVALID);
ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
ac_add_arg(&ctx->args->ac, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
assert(gprs + size <= num_sgprs + num_vgprs &&
@ -522,10 +520,10 @@ void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_poi
num_out = 0;
num_out_sgpr = 0;
for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
for (unsigned i = 0; i < ctx->args->ac.arg_count; ++i) {
LLVMValueRef param = LLVMGetParam(ctx->main_fn.value, i);
LLVMTypeRef param_type = LLVMTypeOf(param);
LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
LLVMTypeRef out_type = ctx->args->ac.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
unsigned size = ac_get_type_size(param_type) / 4;
if (size == 1) {
@ -553,7 +551,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_poi
LLVMBuildExtractElement(builder, param, LLVMConstInt(ctx->ac.i32, j, 0), "");
}
if (ctx->args.args[i].file == AC_ARG_SGPR)
if (ctx->args->ac.args[i].file == AC_ARG_SGPR)
num_out_sgpr = num_out;
}
@ -738,12 +736,13 @@ static LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx)
LLVMValueRef attr_address;
if (ctx->stage == MESA_SHADER_VERTEX && shader->selector->info.base.vs.blit_sgprs_amd) {
struct ac_llvm_pointer ring_ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings);
struct ac_llvm_pointer ring_ptr =
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings);
ring_ptr.pointee_type = ctx->ac.i32;
attr_address = ac_build_load_to_sgpr(&ctx->ac, ring_ptr,
LLVMConstInt(ctx->ac.i32, SI_GS_ATTRIBUTE_RING * 4, 0));
} else {
attr_address = ac_get_arg(&ctx->ac, ctx->gs_attr_address);
attr_address = ac_get_arg(&ctx->ac, ctx->args->gs_attr_address);
}
unsigned stride = 16 * shader->info.nr_param_exports;
@ -770,7 +769,7 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
switch (op) {
case nir_intrinsic_load_first_vertex:
return ac_get_arg(&ctx->ac, ctx->args.base_vertex);
return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
case nir_intrinsic_load_base_vertex: {
/* For non-indexed draws, the base vertex set by the driver
@ -779,7 +778,7 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
*/
LLVMValueRef indexed = GET_FIELD(ctx, VS_STATE_INDEXED);
indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->ac.i1, "");
return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args.base_vertex),
return LLVMBuildSelect(ctx->ac.builder, indexed, ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex),
ctx->ac.i32_0, "");
}
@ -787,9 +786,9 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
assert(ctx->shader->selector->info.base.workgroup_size_variable &&
ctx->shader->selector->info.uses_variable_block_size);
LLVMValueRef chan[3] = {
si_unpack_param(ctx, ctx->block_size, 0, 10),
si_unpack_param(ctx, ctx->block_size, 10, 10),
si_unpack_param(ctx, ctx->block_size, 20, 10),
si_unpack_param(ctx, ctx->args->block_size, 0, 10),
si_unpack_param(ctx, ctx->args->block_size, 10, 10),
si_unpack_param(ctx, ctx->args->block_size, 20, 10),
};
return ac_build_gather_values(&ctx->ac, chan, 3);
}
@ -797,7 +796,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default: {
LLVMValueRef slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
LLVMValueRef buf = ac_build_load_to_sgpr(&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings), slot);
LLVMValueRef buf =
ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings),
slot);
int offset = op == nir_intrinsic_load_tess_level_inner_default ? 4 : 0;
LLVMValueRef val[4];
@ -808,14 +810,14 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
case nir_intrinsic_load_patch_vertices_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
return si_unpack_param(ctx, ctx->args->tcs_out_lds_layout, 13, 6);
else if (ctx->stage == MESA_SHADER_TESS_EVAL)
return si_get_num_tcs_out_vertices(ctx);
else
return NULL;
case nir_intrinsic_load_sample_mask_in:
return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage));
return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage));
case nir_intrinsic_load_lshs_vertex_stride_amd:
return LLVMBuildShl(ctx->ac.builder, si_get_tcs_in_vertex_dw_stride(ctx),
@ -823,17 +825,17 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
case nir_intrinsic_load_tcs_num_patches_amd:
return LLVMBuildAdd(ctx->ac.builder,
si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6),
si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 0, 6),
ctx->ac.i32_1, "");
case nir_intrinsic_load_hs_out_patch_data_offset_amd:
return si_unpack_param(ctx, ctx->tcs_offchip_layout, 11, 21);
return si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 11, 21);
case nir_intrinsic_load_ring_tess_offchip_amd:
return ctx->tess_offchip_ring;
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
return ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
return ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
case nir_intrinsic_load_tess_rel_patch_id_amd:
return si_get_rel_patch_id(ctx);
@ -842,17 +844,17 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
return ctx->esgs_ring;
case nir_intrinsic_load_ring_es2gs_offset_amd:
return ac_get_arg(&ctx->ac, ctx->args.es2gs_offset);
return ac_get_arg(&ctx->ac, ctx->args->ac.es2gs_offset);
case nir_intrinsic_load_clip_half_line_width_amd: {
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->small_prim_cull_info);
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->small_prim_cull_info);
return ac_build_load_to_sgpr(&ctx->ac,
(struct ac_llvm_pointer) { .t = ctx->ac.v2f32, .v = ptr }, LLVMConstInt(ctx->ac.i32, 4, 0));
}
case nir_intrinsic_load_viewport_xy_scale_and_offset: {
bool prim_is_lines = ctx->shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES;
struct ac_llvm_pointer ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->small_prim_cull_info);
struct ac_llvm_pointer ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->small_prim_cull_info);
LLVMValueRef terms =
ac_build_load_to_sgpr(&ctx->ac, ptr, prim_is_lines ? ctx->ac.i32_1 : ctx->ac.i32_0);
return LLVMBuildBitCast(ctx->ac.builder, terms, ctx->ac.v4f32, "");
@ -936,7 +938,7 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
static LLVMValueRef si_llvm_load_user_clip_plane(struct ac_shader_abi *abi, unsigned ucp_id)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct ac_llvm_pointer ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings);
struct ac_llvm_pointer ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings);
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
LLVMValueRef addr = LLVMConstInt(ctx->ac.i32, ucp_id * 16, 0);
@ -947,7 +949,7 @@ static LLVMValueRef si_llvm_load_user_clip_plane(struct ac_shader_abi *abi, unsi
static LLVMValueRef si_llvm_load_streamout_buffer(struct ac_shader_abi *abi, unsigned buffer)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct ac_llvm_pointer buf_ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings);
struct ac_llvm_pointer buf_ptr = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings);
return ac_build_load_to_sgpr(
&ctx->ac, buf_ptr, LLVMConstInt(ctx->ac.i32, SI_VS_STREAMOUT_BUF0 + buffer, false));
@ -988,7 +990,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
/* preload instance_divisor_constbuf to be used for input load after culling */
if (ctx->shader->key.ge.opt.ngg_culling &&
ctx->shader->key.ge.part.vs.prolog.instance_divisor_is_fetched) {
struct ac_llvm_pointer buf = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings);
struct ac_llvm_pointer buf = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings);
ctx->instance_divisor_constbuf =
ac_build_load_to_sgpr(
&ctx->ac, buf, LLVMConstInt(ctx->ac.i32, SI_VS_CONST_INSTANCE_DIVISORS, 0));
@ -1070,7 +1072,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
case MESA_SHADER_COMPUTE:
if (nir->info.cs.user_data_components_amd) {
ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->args->cs_user_data);
ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
nir->info.cs.user_data_components_amd);
}
@ -1224,7 +1226,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
}
}
if (!ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir))
if (!ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args->ac, nir))
return false;
switch (sel->stage) {
@ -1283,7 +1285,8 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
}
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, const struct pipe_stream_output_info *so,
struct si_shader *shader, struct si_shader_args *args,
const struct pipe_stream_output_info *so,
struct util_debug_callback *debug, struct nir_shader *nir,
bool free_nir)
{
@ -1292,6 +1295,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.so = *so;
ctx.args = args;
if (!si_llvm_translate_nir(&ctx, shader, nir, free_nir)) {
si_llvm_dispose(&ctx);
@ -1305,9 +1309,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
for (int i = 0; i < ctx.args->ac.arg_count; i++)
main_arg_types[i] = ctx.args->ac.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID;
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs,
@ -1345,6 +1349,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_ls.key.ge.opt.inline_uniforms = false; /* only TCS can inline uniforms */
shader_ls.is_monolithic = true;
si_init_shader_args(&shader_ls, ctx.args);
nir = si_get_nir_shader(&shader_ls, &free_nir, sel->info.tcs_vgpr_only_inputs);
si_update_shader_binary_info(shader, nir);
@ -1355,9 +1360,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader->info.uses_instanceid |= ls->info.uses_instanceid;
parts[1] = ctx.main_fn;
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
for (int i = 0; i < ctx.args->ac.arg_count; i++)
main_arg_types[i] = ctx.args->ac.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID;
/* LS prolog */
if (vs_needs_prolog) {
@ -1383,9 +1388,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
parts[0] = ctx.main_fn;
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
for (int i = 0; i < ctx.args->ac.arg_count; i++)
main_arg_types[i] = ctx.args->ac.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
@ -1416,6 +1421,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_es.key.ge.opt.kill_outputs = 0;
shader_es.is_monolithic = true;
si_init_shader_args(&shader_es, ctx.args);
nir = si_get_nir_shader(&shader_es, &free_nir, 0);
si_update_shader_binary_info(shader, nir);
@ -1427,9 +1433,9 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
es_main = ctx.main_fn;
/* Preserve main (= es_main) arguments. */
for (int i = 0; i < ctx.args.arg_count; i++)
main_arg_types[i] = ctx.args.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID;
for (int i = 0; i < ctx.args->ac.arg_count; i++)
main_arg_types[i] = ctx.args->ac.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID;
/* ES prolog */
if (es->stage == MESA_SHADER_VERTEX &&

View file

@ -33,14 +33,14 @@ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
{
/* Return true if the current thread should execute an ES thread. */
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8), "");
si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 0, 8), "");
}
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
{
/* Return true if the current thread should execute a GS thread. */
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
si_unpack_param(ctx, ctx->args.merged_wave_info, 8, 8), "");
si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 8, 8), "");
}
/* Pass GS inputs from ES to GS on GFX9. */
@ -51,34 +51,34 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
ret = si_insert_input_ptr(ctx, ret, ctx->args->other_const_and_shader_buffers, 0);
ret = si_insert_input_ptr(ctx, ret, ctx->args->other_samplers_and_images, 1);
if (ctx->shader->key.ge.as_ngg)
ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);
ret = si_insert_input_ptr(ctx, ret, ctx->args->ac.gs_tg_info, 2);
else
ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.merged_wave_info, 3);
if (ctx->screen->info.gfx_level >= GFX11)
ret = si_insert_input_ret(ctx, ret, ctx->args.gs_attr_offset, 5);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.gs_attr_offset, 5);
else
ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.scratch_offset, 5);
ret = si_insert_input_ptr(ctx, ret, ctx->args->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
ret = si_insert_input_ptr(ctx, ret, ctx->args->bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
if (ctx->screen->use_ngg) {
ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ptr(ctx, ret, ctx->small_prim_cull_info, 8 + GFX9_SGPR_SMALL_PRIM_CULL_INFO);
ret = si_insert_input_ptr(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ptr(ctx, ret, ctx->args->small_prim_cull_info, 8 + GFX9_SGPR_SMALL_PRIM_CULL_INFO);
if (ctx->screen->info.gfx_level >= GFX11)
ret = si_insert_input_ptr(ctx, ret, ctx->gs_attr_address, 8 + GFX9_SGPR_ATTRIBUTE_RING_ADDR);
ret = si_insert_input_ptr(ctx, ret, ctx->args->gs_attr_address, 8 + GFX9_SGPR_ATTRIBUTE_RING_ADDR);
}
unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[0], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[1], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[2], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.gs_vtx_offset[0], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.gs_vtx_offset[1], vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.gs_prim_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.gs_invocation_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.gs_vtx_offset[2], vgpr++);
ctx->return_value = ret;
}
@ -91,14 +91,15 @@ void si_llvm_es_build_end(struct si_shader_context *ctx)
static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
{
if (ctx->screen->info.gfx_level >= GFX9)
return si_unpack_param(ctx, ctx->args.merged_wave_info, 16, 8);
return si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 16, 8);
else
return ac_get_arg(&ctx->ac, ctx->args.gs_wave_id);
return ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
}
static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx)
{
return ac_build_load_to_sgpr(&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings),
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings),
LLVMConstInt(ctx->ac.i32, SI_GS_QUERY_EMULATED_COUNTERS_BUF, false));
}
@ -171,7 +172,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream,
struct si_shader_info *info = &ctx->shader->selector->info;
struct si_shader *shader = ctx->shader;
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset);
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset);
unsigned offset = 0;
for (unsigned i = 0; i < info->num_outputs; i++) {
@ -225,7 +226,7 @@ void si_preload_esgs_ring(struct si_shader_context *ctx)
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_ESGS, 0);
ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings), offset);
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), offset);
if (ctx->stage != MESA_SHADER_GEOMETRY) {
LLVMValueRef desc1 = LLVMBuildExtractElement(builder, ctx->esgs_ring, ctx->ac.i32_1, "");
@ -267,7 +268,7 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_GSVS, 0);
LLVMValueRef base_ring = ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings), offset);
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), offset);
/* The conceptual layout of the GSVS ring is
* v0c0 .. vLv0 v0c1 .. vLc1 ..
@ -453,6 +454,10 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
ctx.stage = MESA_SHADER_VERTEX;
ctx.so = *so;
struct si_shader_args args;
si_init_shader_args(shader, &args);
ctx.args = &args;
builder = ctx.ac.builder;
/* Build the main function. */
@ -460,7 +465,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
ctx.gsvs_ring[0] =
ac_build_load_to_sgpr(&ctx.ac,
ac_get_ptr_arg(&ctx.ac, &ctx.args, ctx.internal_bindings), LLVMConstInt(ctx.ac.i32, SI_RING_GSVS, 0));
ac_get_ptr_arg(&ctx.ac, &ctx.args->ac, ctx.args->internal_bindings), LLVMConstInt(ctx.ac.i32, SI_RING_GSVS, 0));
LLVMValueRef voffset =
LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id, LLVMConstInt(ctx.ac.i32, 4, 0), "");
@ -469,7 +474,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
LLVMValueRef stream_id;
if (!sscreen->use_ngg_streamout && ctx.so.num_outputs)
stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2);
stream_id = si_unpack_param(&ctx, ctx.args->ac.streamout_config, 24, 2);
else
stream_id = ctx.ac.i32_0;

View file

@ -28,7 +28,7 @@
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
return si_unpack_param(ctx, ctx->args->ac.ancillary, 8, 4);
}
static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
@ -36,7 +36,7 @@ static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
LLVMValueRef resource = ac_build_load_to_sgpr(
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings), buf_index);
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), buf_index);
/* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
LLVMValueRef offset0 =
@ -65,7 +65,7 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0_FMASK % 2 == 0);
ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
ptr = ac_get_arg(&ctx->ac, ctx->args->internal_bindings);
ptr =
LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->ac.v8i32), "");
struct ac_llvm_pointer desc = { .v = ptr, .t = ctx->ac.v8i32 };
@ -74,14 +74,14 @@ static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
unsigned chan = 0;
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 0, 16);
if (!ctx->shader->key.ps.mono.fbfetch_is_1D)
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
args.coords[chan++] = si_unpack_param(ctx, ctx->args->pos_fixed_pt, 16, 16);
/* Get the current render target layer index. */
if (ctx->shader->key.ps.mono.fbfetch_layered)
args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
args.coords[chan++] = si_unpack_param(ctx, ctx->args->ac.ancillary, 16, 11);
if (ctx->shader->key.ps.mono.fbfetch_msaa)
args.coords[chan++] = si_get_sample_id(ctx);
@ -575,7 +575,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
LLVMValueRef ret, func;
int num_returns, i, num_color_channels;
memset(&ctx->args, 0, sizeof(ctx->args));
memset(ctx->args, 0, sizeof(*ctx->args));
/* Declare inputs. */
LLVMTypeRef return_types[AC_MAX_ARGS];
@ -584,7 +584,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
assert(key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs + num_color_channels <=
AC_MAX_ARGS);
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return_types[num_returns++] = ctx->ac.i32;
}
@ -601,7 +601,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
/* POS_FIXED_PT is always last. */
arg = &pos_fixed_pt;
}
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, arg);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, arg);
return_types[num_returns++] = ctx->ac.f32;
}
@ -617,7 +617,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
for (i = 0; i < ctx->args.arg_count; i++) {
for (i = 0; i < ctx->args->ac.arg_count; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
}
@ -769,7 +769,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, color[chan],
ctx->args.arg_count + color_out_idx++, "");
ctx->args->ac.arg_count + color_out_idx++, "");
}
}
@ -831,22 +831,22 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
struct si_ps_exports exp = {};
LLVMValueRef color[8][4] = {};
memset(&ctx->args, 0, sizeof(ctx->args));
memset(ctx->args, 0, sizeof(*ctx->args));
/* Declare input SGPRs. */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->internal_bindings);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->bindless_samplers_and_images);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->const_and_shader_buffers);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->samplers_and_images);
si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL, SI_PARAM_ALPHA_REF);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->internal_bindings);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->bindless_samplers_and_images);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->const_and_shader_buffers);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->samplers_and_images);
si_add_arg_checked(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL, SI_PARAM_ALPHA_REF);
/* Declare input VGPRs. */
unsigned required_num_params =
ctx->args.num_sgprs_used + util_bitcount(key->ps_epilog.colors_written) * 4 +
ctx->args->ac.num_sgprs_used + util_bitcount(key->ps_epilog.colors_written) * 4 +
key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask;
while (ctx->args.arg_count < required_num_params)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
while (ctx->args->ac.arg_count < required_num_params)
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
/* Create the function. */
si_llvm_create_func(ctx, "ps_epilog", NULL, 0, 0);
@ -854,7 +854,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff);
/* Prepare color. */
unsigned vgpr = ctx->args.num_sgprs_used;
unsigned vgpr = ctx->args->ac.num_sgprs_used;
unsigned colors_written = key->ps_epilog.colors_written;
while (colors_written) {
@ -886,7 +886,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part
key->ps_epilog.writes_samplemask ||
mrtz_alpha) {
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
unsigned vgpr_index = ctx->args.num_sgprs_used +
unsigned vgpr_index = ctx->args->ac.num_sgprs_used +
util_bitcount(key->ps_epilog.colors_written) * 4;
if (key->ps_epilog.writes_z)
@ -939,8 +939,8 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
struct ac_llvm_pointer main_fn = ctx->main_fn;
/* Preserve main arguments. */
enum ac_arg_type main_arg_types[AC_MAX_ARGS];
for (int i = 0; i < ctx->args.arg_count; i++)
main_arg_types[i] = ctx->args.args[i].type;
for (int i = 0; i < ctx->args->ac.arg_count; i++)
main_arg_types[i] = ctx->args->ac.args[i].type;
union si_shader_part_key prolog_key;

View file

@ -55,7 +55,7 @@ static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValue
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
{
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->const_and_shader_buffers);
struct si_shader_selector *sel = ctx->shader->selector;
/* Do the bounds checking with a descriptor, because
@ -103,7 +103,7 @@ static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS, 0), "");
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->const_and_shader_buffers),
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->const_and_shader_buffers),
index);
}
@ -114,14 +114,14 @@ static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, boo
/* Fast path if the shader buffer is in user SGPRs. */
if (LLVMIsConstant(index) &&
LLVMConstIntGetZExtValue(index) < ctx->shader->selector->cs_num_shaderbufs_in_user_sgprs)
return ac_get_arg(&ctx->ac, ctx->cs_shaderbuf[LLVMConstIntGetZExtValue(index)]);
return ac_get_arg(&ctx->ac, ctx->args->cs_shaderbuf[LLVMConstIntGetZExtValue(index)]);
index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS - 1, 0),
index, "");
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->const_and_shader_buffers),
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->const_and_shader_buffers),
index);
}
@ -256,7 +256,7 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
assert(desc_type <= AC_DESC_BUFFER);
if (bindless) {
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->bindless_samplers_and_images);
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->bindless_samplers_and_images);
/* dynamic_index is the bindless handle */
if (image) {
@ -288,7 +288,7 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
if (const_index >= num_slots)
const_index = base_index;
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->samplers_and_images);
struct ac_llvm_pointer list = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->samplers_and_images);
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
if (dynamic_index) {
@ -311,7 +311,7 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
if (!dynamic_index &&
const_index < ctx->shader->selector->cs_num_images_in_user_sgprs &&
(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER)) {
LLVMValueRef rsrc = ac_get_arg(&ctx->ac, ctx->cs_image[const_index]);
LLVMValueRef rsrc = ac_get_arg(&ctx->ac, ctx->args->cs_image[const_index]);
if (desc_type == AC_DESC_IMAGE)
rsrc = fixup_image_desc(ctx, rsrc, write);

View file

@ -30,12 +30,12 @@ LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx)
{
switch (ctx->stage) {
case MESA_SHADER_TESS_CTRL:
return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
return si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 0, 8);
case MESA_SHADER_TESS_EVAL:
return ctx->abi.tes_rel_patch_id_replaced ?
ctx->abi.tes_rel_patch_id_replaced :
ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id);
ac_get_arg(&ctx->ac, ctx->args->ac.tes_rel_patch_id);
default:
assert(0);
@ -83,7 +83,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16);
return si_unpack_param(ctx, ctx->args->tcs_out_lds_offsets, 16, 16);
}
static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
@ -106,7 +106,7 @@ LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx)
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
return LLVMBuildAdd(ctx->ac.builder,
si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 5), ctx->ac.i32_1, "");
si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 6, 5), ctx->ac.i32_1, "");
}
LLVMValueRef si_get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
@ -157,7 +157,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
LLVMValueRef param_stride, constant16;
vertices_per_patch = si_get_num_tcs_out_vertices(ctx);
num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
num_patches = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 0, 6);
num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");
total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
@ -174,7 +174,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
if (!vertex_index) {
LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 11, 21);
LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 11, 21);
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
}
@ -218,7 +218,7 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef addr = ac_get_arg(
&ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);
&ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->args->tes_offchip_addr : ctx->args->tcs_out_lds_layout);
/* TCS only receives high 13 bits of the address. */
if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
@ -270,7 +270,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
ubyte semantic = info->input[driver_location].semantic;
/* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
unsigned func_param = ctx->args->ac.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(semantic, false) * 4;
LLVMValueRef value[4];
@ -384,7 +384,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
/* Get the offset. */
tf_base = ac_get_arg(&ctx->ac, ctx->args.tcs_factor_offset);
tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset);
byteoffset =
LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
offset = 0;
@ -418,7 +418,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader
unsigned param_outer, param_inner;
buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
base = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
@ -449,7 +449,7 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
rel_patch_id = si_get_rel_patch_id(ctx);
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
invocation_id = si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
if (ctx->screen->info.gfx_level >= GFX9 && !ctx->shader->is_monolithic) {
@ -477,18 +477,18 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
if (ctx->screen->info.gfx_level >= GFX9) {
ret =
si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
/* Tess offchip and tess factor offsets are at the beginning. */
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
} else {
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
/* Tess offchip and tess factor offsets are after user SGPRs. */
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
}
@ -542,30 +542,30 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
ret = si_insert_input_ptr(ctx, ret, ctx->args->other_const_and_shader_buffers, 0);
ret = si_insert_input_ptr(ctx, ret, ctx->args->other_samplers_and_images, 1);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
if (ctx->screen->info.gfx_level <= GFX10_3)
ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
ret = si_insert_input_ret(ctx, ret, ctx->args->ac.scratch_offset, 5);
ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
ret = si_insert_input_ptr(ctx, ret, ctx->args->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
ret = si_insert_input_ptr(ctx, ret, ctx->args->bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ret(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id)),
vgpr++, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids)),
vgpr++, "");
ctx->return_value = ret;
}
@ -604,54 +604,54 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
*/
void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
{
memset(&ctx->args, 0, sizeof(ctx->args));
memset(ctx->args, 0, sizeof(*ctx->args));
if (ctx->screen->info.gfx_level >= GFX9) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tess_offchip_offset);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tcs_factor_offset);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_offchip_layout);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_out_lds_layout);
} else {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_offchip_layout);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_out_lds_layout);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tess_offchip_offset);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tcs_factor_offset);
}
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
struct ac_arg invocation_id; /* invocation ID within the patch */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
struct ac_arg
tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
struct ac_arg tess_factors[6];
for (unsigned i = 0; i < 6; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
/* Create the function. */
si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.gfx_level >= GFX7 ? 128 : 0);

View file

@ -100,7 +100,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
*/
LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, vertex_id, ctx->ac.i32_1, "");
unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index;
unsigned param_vs_blit_inputs = ctx->args->vs_blit_inputs.arg_index;
if (input_index == 0) {
/* Position: */
LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs);
@ -165,11 +165,11 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
LLVMValueRef tmp;
if (input_index < num_vbos_in_user_sgprs) {
vb_desc = ac_get_arg(&ctx->ac, ctx->vb_descriptors[input_index]);
vb_desc = ac_get_arg(&ctx->ac, ctx->args->vb_descriptors[input_index]);
} else {
unsigned index = input_index - num_vbos_in_user_sgprs;
vb_desc = ac_build_load_to_sgpr(
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->args.vertex_buffers),
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->ac.vertex_buffers),
LLVMConstInt(ctx->ac.i32, index, 0));
}
@ -182,10 +182,11 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
vertex_index = get_vertex_index(ctx, &ctx->shader->key.ge.part.vs.prolog,
input_index, ctx->instance_divisor_constbuf,
ctx->args.start_instance.arg_index,
ctx->args.base_vertex.arg_index);
ctx->args->ac.start_instance.arg_index,
ctx->args->ac.base_vertex.arg_index);
} else {
vertex_index = LLVMGetParam(ctx->main_fn.value, ctx->vertex_index0.arg_index + input_index);
vertex_index = LLVMGetParam(ctx->main_fn.value,
ctx->args->vertex_index0.arg_index + input_index);
}
/* Use the open-coded implementation for all loads of doubles and
@ -382,7 +383,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
int i;
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->args.streamout_config, 16, 7);
LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->args->ac.streamout_config, 16, 7);
LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
@ -400,7 +401,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
* attrib_offset
*/
LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args.streamout_write_index);
LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
/* Compute (streamout_write_index + thread_id). */
so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
@ -409,7 +410,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
* enabled buffer. */
LLVMValueRef so_write_offset[4] = {};
LLVMValueRef so_buffers[4];
struct ac_llvm_pointer arg = ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings);
struct ac_llvm_pointer arg = ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings);
for (i = 0; i < 4; i++) {
if (!so->stride[i])
@ -419,7 +420,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, arg, offset);
LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args.streamout_offset[i]);
LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
so_write_offset[i] = ac_build_imad(
@ -452,7 +453,7 @@ void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
LLVMValueRef base_elt;
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
LLVMValueRef const_resource = ac_build_load_to_sgpr(
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args, ctx->internal_bindings), constbuf_index);
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), constbuf_index);
unsigned clipdist_mask = ctx->shader->selector->info.clipdist_mask &
~ctx->shader->key.ge.opt.kill_clip_distances;
@ -791,7 +792,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs;
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
memset(&ctx->args, 0, sizeof(ctx->args));
memset(ctx->args, 0, sizeof(*ctx->args));
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
returns = alloca((num_all_input_regs + key->vs_prolog.num_inputs) * sizeof(LLVMTypeRef));
@ -799,13 +800,13 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
/* Declare input and output SGPRs. */
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]);
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &input_sgpr_param[i]);
returns[num_returns++] = ctx->ac.i32;
}
/* Preloaded VGPRs (outputs must be floats) */
for (i = 0; i < num_input_vgprs; i++) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]);
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &input_vgpr_param[i]);
returns[num_returns++] = ctx->ac.f32;
}
@ -888,7 +889,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part
user_sgpr_base + SI_SGPR_BASE_VERTEX);
index = ac_to_float(&ctx->ac, index);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args.arg_count + i, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index, ctx->args->ac.arg_count + i, "");
}
si_llvm_build_ret(ctx, ret);