diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index ae9be303780..7a2026f3000 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -40,6 +40,7 @@ enum ac_arg_regfile enum ac_arg_type { + AC_ARG_INVALID = -1, AC_ARG_FLOAT, AC_ARG_INT, AC_ARG_CONST_PTR, /* Pointer to i8 array */ diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 7e0e16abfaa..a3d3df8a82a 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4320,10 +4320,10 @@ LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx) return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), ""); } -LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, LLVMValueRef *args, unsigned num_args) { - LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, ""); + LLVMValueRef ret = LLVMBuildCall2(ctx->builder, fn_type, func, args, num_args, ""); LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func)); return ret; } @@ -4538,6 +4538,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac base = ctx->v8i32; break; default: + assert(false); return NULL; } @@ -4550,7 +4551,7 @@ static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac } } -LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, +struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, enum ac_llvm_calling_convention convention, const char *name, LLVMTypeRef ret_type, LLVMModuleRef module) { @@ -4583,14 +4584,17 @@ LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_con } } - ctx->main_function = main_function; + ctx->main_function = (struct ac_llvm_pointer) { + .value = main_function, + .pointee_type = main_function_type + }; /* Enable denormals for FP16 and FP64: */ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math", "ieee,ieee"); /* Disable denormals for FP32: */ LLVMAddTargetDependentFunctionAttr(main_function, "denormal-fp-math-f32", "preserve-sign,preserve-sign"); - return main_function; + return ctx->main_function; } void ac_build_s_endpgm(struct ac_llvm_context *ctx) diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index cdc1ac7d7fb..17372cf212c 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -83,7 +83,7 @@ struct ac_llvm_context { LLVMModuleRef module; LLVMBuilderRef builder; - LLVMValueRef main_function; + struct ac_llvm_pointer main_function; LLVMTypeRef voidt; LLVMTypeRef i1; @@ -561,8 +561,8 @@ LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx); LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx); -LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args, - unsigned num_args); +LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, + LLVMValueRef *args, unsigned num_args); LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope); @@ -595,7 +595,7 @@ LLVMTypeRef ac_arg_type_to_pointee_type(struct ac_llvm_context *ctx, enum ac_arg static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg) { assert(arg.used); - return LLVMGetParam(ctx->main_function, arg.arg_index); + return LLVMGetParam(ctx->main_function.value, arg.arg_index); } static inline LLVMTypeRef ac_get_arg_pointee_type(struct ac_llvm_context *ctx, const struct ac_shader_args *args, struct ac_arg arg) @@ -613,9 +613,9 @@ enum ac_llvm_calling_convention AC_LLVM_AMDGPU_HS = 93, }; -LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, - enum ac_llvm_calling_convention convention, const char *name, - LLVMTypeRef ret_type, LLVMModuleRef module); +struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx, + enum ac_llvm_calling_convention convention, const char *name, + LLVMTypeRef ret_type, LLVMModuleRef module); void ac_build_s_endpgm(struct ac_llvm_context *ctx); void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 08da30def76..c736926d2cb 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -52,7 +52,7 @@ struct radv_shader_context { unsigned max_workgroup_size; LLVMContextRef context; - LLVMValueRef main_function; + struct ac_llvm_pointer main_function; LLVMValueRef descriptor_sets[MAX_SETS]; @@ -83,20 +83,20 @@ radv_shader_context_from_abi(struct ac_shader_abi *abi) return container_of(abi, struct radv_shader_context, abi); } -static LLVMValueRef +static struct ac_llvm_pointer create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder, const struct ac_shader_args *args, enum ac_llvm_calling_convention convention, unsigned max_workgroup_size, const struct radv_nir_compiler_options *options) { - LLVMValueRef main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module); + struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module); if (options->address32_hi) { - ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-32bit-address-high-bits", + ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits", options->address32_hi); } - ac_llvm_set_workgroup_size(main_function, max_workgroup_size); - ac_llvm_set_target_features(main_function, ctx); + ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size); + ac_llvm_set_target_features(main_function.value, ctx); return main_function; } @@ -170,7 +170,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac, - get_llvm_calling_convention(ctx->main_function, stage), + get_llvm_calling_convention(ctx->main_function.value, stage), ctx->max_workgroup_size, ctx->options); ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", @@ -1599,7 +1599,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) LLVMBasicBlockRef end_bb; LLVMValueRef switch_inst; - end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function, "end"); + end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function.value, "end"); switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4); for (unsigned stream = 0; stream < 4; stream++) { diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index e75de6952ba..d846c64632f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -56,7 +56,7 @@ struct si_shader_context { LLVMBasicBlockRef merged_wrap_if_entry_block; int merged_wrap_if_label; - LLVMValueRef main_fn; + struct ac_llvm_pointer main_fn; LLVMTypeRef return_type; struct ac_arg const_and_shader_buffers; @@ -218,9 +218,11 @@ void si_llvm_declare_esgs_ring(struct si_shader_context *ctx); LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift, unsigned bitwidth); LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle); -void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts, +void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts, unsigned num_parts, unsigned main_part, - unsigned next_shader_first_part, bool same_thread_count); + unsigned next_shader_first_part, + enum ac_arg_type *main_arg_types, + bool same_thread_count); bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader, struct nir_shader *nir, bool free_nir, bool ngg_cull_shader); bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 75e669fa890..03b4f10353e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -185,16 +185,16 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy ctx->return_value = LLVMGetUndef(ctx->return_type); if (ctx->screen->info.address32_hi) { - ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-32bit-address-high-bits", + ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-32bit-address-high-bits", ctx->screen->info.address32_hi); } if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg && si_shader_uses_streamout(ctx->shader)) - ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256); + ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "amdgpu-gds-size", 256); - ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size); - ac_llvm_set_target_features(ctx->main_fn, &ctx->ac); + ac_llvm_set_workgroup_size(ctx->main_fn.value, max_workgroup_size); + ac_llvm_set_target_features(ctx->main_fn.value, &ctx->ac); } void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader) @@ -216,7 +216,7 @@ void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shade /* Reserve register locations for VGPR inputs the PS prolog may need. */ if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) { ac_llvm_add_target_dep_function_attr( - ctx->main_fn, "InitialPSInputAddr", + ctx->main_fn.value, "InitialPSInputAddr", S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) | S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) | S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) | @@ -314,7 +314,7 @@ LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx) LLVMValueRef ptr[2], list; bool merged_shader = si_is_merged_shader(ctx->shader); - ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS); + ptr[0] = LLVMGetParam(ctx->main_fn.value, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS); list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), ""); return list; @@ -420,9 +420,10 @@ static void si_llvm_declare_compute_memory(struct si_shader_context *ctx) * Given a list of shader part functions, build a wrapper function that * runs them in sequence to form a monolithic shader. */ -void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts, +void si_build_wrapper_function(struct si_shader_context *ctx, struct ac_llvm_pointer *parts, unsigned num_parts, unsigned main_part, - unsigned next_shader_first_part, bool same_thread_count) + unsigned next_shader_first_part, + enum ac_arg_type *main_arg_types, bool same_thread_count) { LLVMBuilderRef builder = ctx->ac.builder; /* PS epilog has one arg per color component; gfx9 merged shader @@ -440,8 +441,8 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part memset(&ctx->args, 0, sizeof(ctx->args)); for (unsigned i = 0; i < num_parts; ++i) { - ac_add_function_attr(ctx->ac.context, parts[i], -1, AC_FUNC_ATTR_ALWAYSINLINE); - LLVMSetLinkage(parts[i], LLVMPrivateLinkage); + ac_add_function_attr(ctx->ac.context, parts[i].value, -1, AC_FUNC_ATTR_ALWAYSINLINE); + LLVMSetLinkage(parts[i].value, LLVMPrivateLinkage); } /* The parameters of the wrapper function correspond to those of the @@ -452,11 +453,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part num_sgprs = 0; num_vgprs = 0; - function_type = LLVMGetElementType(LLVMTypeOf(parts[0])); + function_type = parts[0].pointee_type; num_first_params = LLVMCountParamTypes(function_type); for (unsigned i = 0; i < num_first_params; ++i) { - LLVMValueRef param = LLVMGetParam(parts[0], i); + LLVMValueRef param = LLVMGetParam(parts[0].value, i); if (ac_is_sgpr_param(param)) { assert(num_vgprs == 0); @@ -468,31 +469,11 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part gprs = 0; while (gprs < num_sgprs + num_vgprs) { - LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count); + LLVMValueRef param = LLVMGetParam(parts[main_part].value, ctx->args.arg_count); LLVMTypeRef type = LLVMTypeOf(param); unsigned size = ac_get_type_size(type) / 4; - - /* This is going to get casted anyways, so we don't have to - * have the exact same type. But we do have to preserve the - * pointer-ness so that LLVM knows about it. - */ - enum ac_arg_type arg_type = AC_ARG_INT; - if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) { - type = LLVMGetElementType(type); - - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - if (LLVMGetVectorSize(type) == 4) - arg_type = AC_ARG_CONST_DESC_PTR; - else if (LLVMGetVectorSize(type) == 8) - arg_type = AC_ARG_CONST_IMAGE_PTR; - else - assert(0); - } else if (type == ctx->ac.f32) { - arg_type = AC_ARG_CONST_FLOAT_PTR; - } else { - assert(0); - } - } + enum ac_arg_type arg_type = main_arg_types[ctx->args.arg_count]; + assert(arg_type != AC_ARG_INVALID); ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL); @@ -507,7 +488,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part unsigned num_returns = 0; LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type; - last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1])); + last_func_type = parts[num_parts - 1].pointee_type; return_type = LLVMGetReturnType(last_func_type); switch (LLVMGetTypeKind(return_type)) { @@ -535,7 +516,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part num_out_sgpr = 0; for (unsigned i = 0; i < ctx->args.arg_count; ++i) { - LLVMValueRef param = LLVMGetParam(ctx->main_fn, i); + LLVMValueRef param = LLVMGetParam(ctx->main_fn.value, i); LLVMTypeRef param_type = LLVMTypeOf(param); LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32; unsigned size = ac_get_type_size(param_type) / 4; @@ -579,7 +560,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part LLVMValueRef in[AC_MAX_ARGS]; LLVMTypeRef ret_type; unsigned out_idx = 0; - unsigned num_params = LLVMCountParams(parts[part]); + unsigned num_params = LLVMCountParams(parts[part].value); /* Merged shaders are executed conditionally depending * on the number of enabled threads passed in the input SGPRs. */ @@ -609,13 +590,13 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part unsigned param_size; LLVMValueRef arg = NULL; - param = LLVMGetParam(parts[part], param_idx); + param = LLVMGetParam(parts[part].value, param_idx); param_type = LLVMTypeOf(param); param_size = ac_get_type_size(param_type) / 4; is_sgpr = ac_is_sgpr_param(param); if (is_sgpr) { - ac_add_function_attr(ctx->ac.context, parts[part], param_idx + 1, AC_FUNC_ATTR_INREG); + ac_add_function_attr(ctx->ac.context, parts[part].value, param_idx + 1, AC_FUNC_ATTR_INREG); } else if (out_idx < num_out_sgpr) { /* Skip returned SGPRs the current part doesn't * declare on the input. */ @@ -647,7 +628,7 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part out_idx += param_size; } - ret = ac_build_call(&ctx->ac, parts[part], in, num_params); + ret = ac_build_call(&ctx->ac, parts[part].pointee_type, parts[part].value, in, num_params); if (!same_thread_count && si_is_multi_part_shader(ctx->shader) && part + 1 == next_shader_first_part) { @@ -991,7 +972,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad si_llvm_init_ps_callbacks(ctx); unsigned colors_read = ctx->shader->selector->info.colors_read; - LLVMValueRef main_fn = ctx->main_fn; + LLVMValueRef main_fn = ctx->main_fn.value; LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32); @@ -1265,14 +1246,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size); ctx.so = *so; - LLVMValueRef ngg_cull_main_fn = NULL; + struct ac_llvm_pointer ngg_cull_main_fn = {}; if (sel->stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) { if (!si_llvm_translate_nir(&ctx, shader, nir, false, true)) { si_llvm_dispose(&ctx); return false; } ngg_cull_main_fn = ctx.main_fn; - ctx.main_fn = NULL; + ctx.main_fn.value = NULL; } if (!si_llvm_translate_nir(&ctx, shader, nir, free_nir, false)) { @@ -1281,12 +1262,18 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * } if (shader->is_monolithic && sel->stage == MESA_SHADER_VERTEX) { - LLVMValueRef parts[4]; + struct ac_llvm_pointer parts[4]; unsigned num_parts = 0; bool first_is_prolog = false; - LLVMValueRef main_fn = ctx.main_fn; + struct ac_llvm_pointer main_fn = ctx.main_fn; - if (ngg_cull_main_fn) { + /* Preserve main arguments. */ + enum ac_arg_type main_arg_types[AC_MAX_ARGS]; + for (int i = 0; i < ctx.args.arg_count; i++) + main_arg_types[i] = ctx.args.args[i].type; + main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID; + + if (ngg_cull_main_fn.value) { if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true, @@ -1311,9 +1298,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * } parts[num_parts++] = main_fn; - si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false); - } else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) { - LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn; + si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, main_arg_types, false); + } else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn.value) { + struct ac_llvm_pointer parts[3], prolog, main_fn = ctx.main_fn; + + /* Preserve main arguments. */ + enum ac_arg_type main_arg_types[AC_MAX_ARGS]; + for (int i = 0; i < ctx.args.arg_count; i++) + main_arg_types[i] = ctx.args.args[i].type; + main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID; /* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */ union si_shader_part_key prolog_key; @@ -1330,11 +1323,14 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[1] = prolog; parts[2] = main_fn; - si_build_wrapper_function(&ctx, parts, 3, 0, 0, false); + si_build_wrapper_function(&ctx, parts, 3, 0, 0, main_arg_types, false); } else if (shader->is_monolithic && sel->stage == MESA_SHADER_TESS_CTRL) { + /* Preserve main arguments. */ + enum ac_arg_type main_arg_types[AC_MAX_ARGS]; + if (sscreen->info.gfx_level >= GFX9) { struct si_shader_selector *ls = shader->key.ge.part.tcs.ls; - LLVMValueRef parts[4]; + struct ac_llvm_pointer parts[4]; bool vs_needs_prolog = si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false, false); @@ -1366,6 +1362,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * shader->info.uses_instanceid |= ls->info.uses_instanceid; parts[1] = ctx.main_fn; + for (int i = 0; i < ctx.args.arg_count; i++) + main_arg_types[i] = ctx.args.args[i].type; + main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID; + /* LS prolog */ if (vs_needs_prolog) { union si_shader_part_key vs_prolog_key; @@ -1382,26 +1382,33 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog, vs_needs_prolog, vs_needs_prolog ? 2 : 1, + main_arg_types, shader->key.ge.opt.same_patch_vertices); } else { - LLVMValueRef parts[2]; + struct ac_llvm_pointer parts[2]; union si_shader_part_key epilog_key; parts[0] = ctx.main_fn; + for (int i = 0; i < ctx.args.arg_count; i++) + main_arg_types[i] = ctx.args.args[i].type; + main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID; + memset(&epilog_key, 0, sizeof(epilog_key)); epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog; si_llvm_build_tcs_epilog(&ctx, &epilog_key); parts[1] = ctx.main_fn; - si_build_wrapper_function(&ctx, parts, 2, 0, 0, false); + si_build_wrapper_function(&ctx, parts, 2, 0, 0, main_arg_types, false); } } else if (shader->is_monolithic && sel->stage == MESA_SHADER_GEOMETRY) { if (ctx.screen->info.gfx_level >= GFX9) { + enum ac_arg_type main_arg_types[AC_MAX_ARGS]; + struct si_shader_selector *es = shader->key.ge.part.gs.es; - LLVMValueRef es_prolog = NULL; - LLVMValueRef es_main = NULL; - LLVMValueRef gs_main = ctx.main_fn; + struct ac_llvm_pointer es_prolog = {}; + struct ac_llvm_pointer es_main = {}; + struct ac_llvm_pointer gs_main = ctx.main_fn; /* ES main part */ struct si_shader shader_es = {}; @@ -1426,6 +1433,11 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * shader->info.uses_instanceid |= es->info.uses_instanceid; es_main = ctx.main_fn; + /* Preserve main (= es_main) arguments. */ + for (int i = 0; i < ctx.args.arg_count; i++) + main_arg_types[i] = ctx.args.args[i].type; + main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args.arg_count)] = AC_ARG_INVALID; + /* ES prolog */ if (es->stage == MESA_SHADER_VERTEX && si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false, true)) { @@ -1442,16 +1454,16 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * ctx.stage = MESA_SHADER_GEOMETRY; /* Prepare the array of shader parts. */ - LLVMValueRef parts[4]; + struct ac_llvm_pointer parts[4]; unsigned num_parts = 0, main_part; - if (es_prolog) + if (es_prolog.value) parts[num_parts++] = es_prolog; parts[main_part = num_parts++] = es_main; parts[num_parts++] = gs_main; - si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, false); + si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, main_arg_types, false); } else { /* Nothing to do for gfx6-8. The shader has only 1 part and it's ctx.main_fn. */ } @@ -1462,7 +1474,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_llvm_optimize_module(&ctx); /* Make sure the input is a pointer and not integer followed by inttoptr. */ - assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind); + assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn.value, 0))) == LLVMPointerTypeKind); /* Compile to bytecode. */ if (!si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, &ctx.ac, debug, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 11773844c54..9da73718b5d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -453,7 +453,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, LLVMBasicBlockRef end_bb; LLVMValueRef switch_inst; - end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end"); + end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn.value, "end"); switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4); for (int stream = 0; stream < 4; stream++) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 069a0f58a4e..cd622a220b8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -206,7 +206,7 @@ static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha) LLVMRealPredicate cond = cond_map[ctx->shader->key.ps.part.epilog.alpha_func]; assert(cond); - LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF); + LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF); if (LLVMTypeOf(alpha) == ctx->ac.f16) alpha_ref = LLVMBuildFPTrunc(ctx->ac.builder, alpha_ref, ctx->ac.f16, ""); @@ -500,7 +500,7 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx) /* Set SGPRs. */ ret = LLVMBuildInsertValue( - builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF)), + builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn.value, SI_PARAM_ALPHA_REF)), SI_SGPR_ALPHA_REF, ""); /* Set VGPRs */ @@ -609,7 +609,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part /* Create the function. */ si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0); - func = ctx->main_fn; + func = ctx->main_fn.value; /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. @@ -849,7 +849,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part /* Create the function. */ si_llvm_create_func(ctx, "ps_epilog", NULL, 0, 0); /* Disable elimination of unused inputs. */ - ac_llvm_add_target_dep_function_attr(ctx->main_fn, "InitialPSInputAddr", 0xffffff); + ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff); /* Prepare color. */ unsigned vgpr = ctx->args.num_sgprs_used; @@ -861,7 +861,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part if (color_type != SI_TYPE_ANY32) { for (i = 0; i < 4; i++) { - color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr + i / 2); + color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr + i / 2); color[write_i][i] = LLVMBuildBitCast(ctx->ac.builder, color[write_i][i], ctx->ac.v2f16, ""); color[write_i][i] = ac_llvm_extract_elem(&ctx->ac, color[write_i][i], i % 2); @@ -869,7 +869,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part vgpr += 4; } else { for (i = 0; i < 4; i++) - color[write_i][i] = LLVMGetParam(ctx->main_fn, vgpr++); + color[write_i][i] = LLVMGetParam(ctx->main_fn.value, vgpr++); } si_llvm_build_clamp_alpha_test(ctx, color[write_i], write_i); @@ -888,11 +888,11 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part util_bitcount(key->ps_epilog.colors_written) * 4; if (key->ps_epilog.writes_z) - depth = LLVMGetParam(ctx->main_fn, vgpr_index++); + depth = LLVMGetParam(ctx->main_fn.value, vgpr_index++); if (key->ps_epilog.writes_stencil) - stencil = LLVMGetParam(ctx->main_fn, vgpr_index++); + stencil = LLVMGetParam(ctx->main_fn.value, vgpr_index++); if (key->ps_epilog.writes_samplemask) - samplemask = LLVMGetParam(ctx->main_fn, vgpr_index++); + samplemask = LLVMGetParam(ctx->main_fn.value, vgpr_index++); ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, mrtz_alpha, false, &exp.args[exp.num++]); @@ -932,9 +932,14 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader) { - LLVMValueRef parts[3]; + struct ac_llvm_pointer parts[3]; unsigned num_parts = 0, main_index; - LLVMValueRef main_fn = ctx->main_fn; + struct ac_llvm_pointer main_fn = ctx->main_fn; + /* Preserve main arguments. */ + enum ac_arg_type main_arg_types[AC_MAX_ARGS]; + for (int i = 0; i < ctx->args.arg_count; i++) + main_arg_types[i] = ctx->args.args[i].type; + union si_shader_part_key prolog_key; si_get_ps_prolog_key(shader, &prolog_key, false); @@ -952,7 +957,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader si_llvm_build_ps_epilog(ctx, &epilog_key); parts[num_parts++] = ctx->main_fn; - si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, false); + si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false); } void si_llvm_init_ps_callbacks(struct si_shader_context *ctx) diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 382ed80b8f1..5a5665a51f6 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -275,7 +275,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType LLVMValueRef value[4]; for (unsigned i = component; i < component + num_components; i++) { - value[i] = LLVMGetParam(ctx->main_fn, func_param + i); + value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i); value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, ""); } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 34140fa6690..ce0d34b2515 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -76,11 +76,11 @@ static LLVMValueRef get_vertex_index(struct si_shader_context *ctx, if (divisor_is_one || divisor_is_fetched) { /* Add StartInstance. */ index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMGetParam(ctx->main_fn, start_instance), ""); + LLVMGetParam(ctx->main_fn.value, start_instance), ""); } else { /* VertexID + BaseVertex */ index = LLVMBuildAdd(ctx->ac.builder, vertex_id, - LLVMGetParam(ctx->main_fn, base_vertex), ""); + LLVMGetParam(ctx->main_fn.value, base_vertex), ""); } return index; @@ -103,8 +103,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L unsigned param_vs_blit_inputs = ctx->vs_blit_inputs.arg_index; if (input_index == 0) { /* Position: */ - LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs); - LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 1); + LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs); + LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 1); LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0); LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1); @@ -116,7 +116,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->ac.f32, ""); out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->ac.f32, ""); - out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 2); + out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 2); out[3] = ctx->ac.f32_1; return; } @@ -126,19 +126,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { for (int i = 0; i < 4; i++) { - out[i] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3 + i); + out[i] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3 + i); } } else { assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD); - LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 3); - LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 4); - LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 5); - LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 6); + LLVMValueRef x1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 3); + LLVMValueRef y1 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 4); + LLVMValueRef x2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 5); + LLVMValueRef y2 = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 6); out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, x1, x2, ""); out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, y1, y2, ""); - out[2] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 7); - out[3] = LLVMGetParam(ctx->main_fn, param_vs_blit_inputs + 8); + out[2] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 7); + out[3] = LLVMGetParam(ctx->main_fn.value, param_vs_blit_inputs + 8); } return; } @@ -183,8 +183,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L input_index, ctx->instance_divisor_constbuf, ctx->args.start_instance.arg_index, ctx->args.base_vertex.arg_index); - } else - vertex_index = LLVMGetParam(ctx->main_fn, ctx->vertex_index0.arg_index + input_index); + } else { + vertex_index = LLVMGetParam(ctx->main_fn.value, ctx->vertex_index0.arg_index + input_index); + } /* Use the open-coded implementation for all loads of doubles and * of dword-sized data that needs fixups. We need to insert conversion @@ -961,7 +962,7 @@ void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part /* Create the function. */ si_llvm_create_func(ctx, "vs_prolog", returns, num_returns, 0); - func = ctx->main_fn; + func = ctx->main_fn.value; for (i = 0; i < num_input_vgprs; i++) { input_vgprs[i] = ac_get_arg(&ctx->ac, input_vgpr_param[i]);