radeonsi: extract si_get_ps_prolog_args to be shared with aco

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24989>
This commit is contained in:
Qiang Yu 2023-08-07 14:28:50 +08:00
parent 9594a579b2
commit 1728111c29
4 changed files with 117 additions and 147 deletions

View file

@ -3662,3 +3662,37 @@ void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
args->ac.start_instance = input_sgprs[user_sgpr_base + SI_SGPR_START_INSTANCE];
args->ac.base_vertex = input_sgprs[user_sgpr_base + SI_SGPR_BASE_VERTEX];
}
void si_get_ps_prolog_args(struct si_shader_args *args,
const union si_shader_part_key *key)
{
memset(args, 0, sizeof(*args));
const unsigned num_input_sgprs = key->ps_prolog.num_input_sgprs;
struct ac_arg input_sgprs[num_input_sgprs];
for (unsigned i = 0; i < num_input_sgprs; i++)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, input_sgprs + i);
args->internal_bindings = input_sgprs[SI_SGPR_INTERNAL_BINDINGS];
/* Use the absolute location of the input. */
args->ac.prim_mask = input_sgprs[SI_PS_NUM_USER_SGPR];
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.persp_centroid);
/* skip PERSP_PULL_MODEL */
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.linear_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.linear_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_FLOAT, &args->ac.linear_centroid);
/* skip LINE_STIPPLE_TEX */
/* POS_X|Y|Z|W_FLOAT */
for (unsigned i = args->ac.num_vgprs_used; i < key->ps_prolog.face_vgpr_index; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.front_face);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.ancillary);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.pos_fixed_pt);
}

View file

@ -156,6 +156,8 @@ void si_get_tcs_epilog_args(enum amd_gfx_level gfx_level,
void si_get_vs_prolog_args(enum amd_gfx_level gfx_level,
struct si_shader_args *args,
const union si_shader_part_key *key);
void si_get_ps_prolog_args(struct si_shader_args *args,
const union si_shader_part_key *key);
/* gfx10_shader_ngg.c */
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
@ -199,7 +201,6 @@ LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueR
struct ac_arg param, unsigned return_index);
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
struct ac_arg param, unsigned return_index);
LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx);
LLVMValueRef si_prolog_get_internal_binding_slot(struct si_shader_context *ctx, unsigned slot);
LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
unsigned bitwidth);

View file

@ -290,17 +290,6 @@ LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret
return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
}
LLVMValueRef si_prolog_get_internal_bindings(struct si_shader_context *ctx)
{
LLVMValueRef ptr[2], list;
bool merged_shader = si_is_merged_shader(ctx->shader);
ptr[0] = LLVMGetParam(ctx->main_fn.value, (merged_shader ? 8 : 0) + SI_SGPR_INTERNAL_BINDINGS);
list =
LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
return list;
}
LLVMValueRef si_prolog_get_internal_binding_slot(struct si_shader_context *ctx, unsigned slot)
{
LLVMValueRef list = LLVMBuildIntToPtr(

View file

@ -58,9 +58,6 @@ static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
bool interp = interp_param != NULL;
if (interp) {
interp_param =
LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
}
@ -428,24 +425,20 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
ctx->return_value = ret;
}
static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
LLVMValueRef param_internal_bindings,
struct ac_arg param_pos_fixed_pt)
static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef slot, desc, offset, row, bit, address[2];
LLVMValueRef desc, offset, row, bit, address[2];
/* Use the fixed-point gl_FragCoord input.
* Since the stipple pattern is 32x32 and it repeats, just get 5 bits
* per coordinate to get the repeating effect.
*/
address[0] = si_unpack_param(ctx, param_pos_fixed_pt, 0, 5);
address[1] = si_unpack_param(ctx, param_pos_fixed_pt, 16, 5);
address[0] = si_unpack_param(ctx, ctx->args->ac.pos_fixed_pt, 0, 5);
address[1] = si_unpack_param(ctx, ctx->args->ac.pos_fixed_pt, 16, 5);
/* Load the buffer descriptor. */
slot = LLVMConstInt(ctx->ac.i32, SI_PS_CONST_POLY_STIPPLE, 0);
desc = ac_build_load_to_sgpr(&ctx->ac, (struct ac_llvm_pointer) { .t = ctx->ac.v4i32, .v = param_internal_bindings }, slot);
desc = si_prolog_get_internal_binding_slot(ctx, SI_PS_CONST_POLY_STIPPLE);
/* The stipple pattern is 32x32, each row has 32 bits. */
offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->ac.i32, 4, 0), "");
@ -456,6 +449,25 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
ac_build_kill_if_false(&ctx->ac, bit);
}
static LLVMValueRef insert_ret_of_arg(struct si_shader_context *ctx, LLVMValueRef ret,
LLVMValueRef data, unsigned arg_index)
{
unsigned base = ctx->args->ac.args[arg_index].file == AC_ARG_VGPR ?
ctx->args->ac.num_sgprs_used : 0;
unsigned index = base + ctx->args->ac.args[arg_index].offset;
if (ctx->args->ac.args[arg_index].size == 1) {
return LLVMBuildInsertValue(ctx->ac.builder, ret, data, index, "");
} else {
assert(ctx->args->ac.args[arg_index].size == 2);
LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_0, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index, "");
tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_1, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index + 1, "");
return ret;
}
}
/**
* Build the pixel shader prolog function. This handles:
* - two-side color selection and interpolation
@ -468,204 +480,138 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
*/
void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
{
LLVMValueRef ret, func;
int num_returns, i, num_color_channels;
memset(ctx->args, 0, sizeof(*ctx->args));
/* Declare inputs. */
LLVMTypeRef return_types[AC_MAX_ARGS];
num_returns = 0;
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
assert(key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs + num_color_channels <=
AC_MAX_ARGS);
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) {
ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
return_types[num_returns++] = ctx->ac.i32;
}
struct ac_arg pos_fixed_pt;
struct ac_arg ancillary;
struct ac_arg param_sample_mask;
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) {
struct ac_arg *arg = NULL;
if (i == key->ps_prolog.ancillary_vgpr_index) {
arg = &ancillary;
} else if (i == key->ps_prolog.sample_coverage_vgpr_index) {
arg = &param_sample_mask;
} else if (i == key->ps_prolog.num_input_vgprs - 1) {
/* POS_FIXED_PT is always last. */
arg = &pos_fixed_pt;
}
ac_add_arg(&ctx->args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, arg);
return_types[num_returns++] = ctx->ac.f32;
}
struct si_shader_args *args = ctx->args;
si_get_ps_prolog_args(args, key);
/* Declare outputs (same as inputs + add colors if needed) */
for (i = 0; i < num_color_channels; i++)
LLVMTypeRef return_types[AC_MAX_ARGS];
int num_returns = 0;
for (int i = 0; i < args->ac.num_sgprs_used; i++)
return_types[num_returns++] = ctx->ac.i32;
unsigned num_color_channels = util_bitcount(key->ps_prolog.colors_read);
unsigned num_output_vgprs = args->ac.num_vgprs_used + num_color_channels;
for (int i = 0; i < num_output_vgprs; i++)
return_types[num_returns++] = ctx->ac.f32;
/* Create the function. */
si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
func = ctx->main_fn.value;
LLVMValueRef func = ctx->main_fn.value;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
for (i = 0; i < ctx->args->ac.arg_count; i++) {
LLVMValueRef ret = ctx->return_value;
for (int i = 0; i < args->ac.arg_count; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
ret = insert_ret_of_arg(ctx, ret, p, i);
}
/* Polygon stippling. */
if (key->ps_prolog.states.poly_stipple) {
LLVMValueRef list = si_prolog_get_internal_bindings(ctx);
si_llvm_emit_polygon_stipple(ctx, list, pos_fixed_pt);
}
if (key->ps_prolog.states.poly_stipple)
si_llvm_emit_polygon_stipple(ctx);
if (key->ps_prolog.states.bc_optimize_for_persp ||
key->ps_prolog.states.bc_optimize_for_linear) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
LLVMValueRef center, centroid, tmp;
/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
* The hw doesn't compute CENTROID if the whole wave only
* contains fully-covered quads.
*
* PRIM_MASK is after user SGPRs.
*/
bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
LLVMValueRef bc_optimize = ac_get_arg(&ctx->ac, args->ac.prim_mask);
bc_optimize =
LLVMBuildLShr(ctx->ac.builder, bc_optimize, LLVMConstInt(ctx->ac.i32, 31, 0), "");
bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize, ctx->ac.i1, "");
if (key->ps_prolog.states.bc_optimize_for_persp) {
/* Read PERSP_CENTER. */
for (i = 0; i < 2; i++)
center[i] = LLVMGetParam(func, base + 2 + i);
/* Read PERSP_CENTROID. */
for (i = 0; i < 2; i++)
centroid[i] = LLVMGetParam(func, base + 4 + i);
center = ac_get_arg(&ctx->ac, args->ac.persp_center);
centroid = ac_get_arg(&ctx->ac, args->ac.persp_centroid);
/* Select PERSP_CENTROID. */
for (i = 0; i < 2; i++) {
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], centroid[i], "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 4 + i, "");
}
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, "");
ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.persp_centroid.arg_index);
}
if (key->ps_prolog.states.bc_optimize_for_linear) {
/* Read LINEAR_CENTER. */
for (i = 0; i < 2; i++)
center[i] = LLVMGetParam(func, base + 8 + i);
/* Read LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
centroid[i] = LLVMGetParam(func, base + 10 + i);
/* Select LINEAR_CENTROID. */
for (i = 0; i < 2; i++) {
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], centroid[i], "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 10 + i, "");
}
center = ac_get_arg(&ctx->ac, args->ac.linear_center);
centroid = ac_get_arg(&ctx->ac, args->ac.linear_centroid);
/* Select PERSP_CENTROID. */
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, "");
ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.linear_centroid.arg_index);
}
}
/* Force per-sample interpolation. */
if (key->ps_prolog.states.force_persp_sample_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef persp_sample[2];
/* Read PERSP_SAMPLE. */
for (i = 0; i < 2; i++)
persp_sample[i] = LLVMGetParam(func, base + i);
LLVMValueRef persp_sample = ac_get_arg(&ctx->ac, args->ac.persp_sample);
/* Overwrite PERSP_CENTER. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], base + 2 + i, "");
ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_center.arg_index);
/* Overwrite PERSP_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], base + 4 + i, "");
ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_centroid.arg_index);
}
if (key->ps_prolog.states.force_linear_sample_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef linear_sample[2];
/* Read LINEAR_SAMPLE. */
for (i = 0; i < 2; i++)
linear_sample[i] = LLVMGetParam(func, base + 6 + i);
LLVMValueRef linear_sample = ac_get_arg(&ctx->ac, args->ac.linear_sample);
/* Overwrite LINEAR_CENTER. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 8 + i, "");
ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_center.arg_index);
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 10 + i, "");
ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_centroid.arg_index);
}
/* Force center interpolation. */
if (key->ps_prolog.states.force_persp_center_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef persp_center[2];
/* Read PERSP_CENTER. */
for (i = 0; i < 2; i++)
persp_center[i] = LLVMGetParam(func, base + 2 + i);
LLVMValueRef persp_center = ac_get_arg(&ctx->ac, args->ac.persp_center);
/* Overwrite PERSP_SAMPLE. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], base + i, "");
ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_sample.arg_index);
/* Overwrite PERSP_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], base + 4 + i, "");
ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_centroid.arg_index);
}
if (key->ps_prolog.states.force_linear_center_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef linear_center[2];
/* Read LINEAR_CENTER. */
for (i = 0; i < 2; i++)
linear_center[i] = LLVMGetParam(func, base + 8 + i);
LLVMValueRef linear_center = ac_get_arg(&ctx->ac, args->ac.linear_center);
/* Overwrite LINEAR_SAMPLE. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], base + 6 + i, "");
ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_sample.arg_index);
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], base + 10 + i, "");
ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_centroid.arg_index);
}
/* Interpolate colors. */
unsigned color_out_idx = 0;
for (i = 0; i < 2; i++) {
unsigned num_input_gprs = args->ac.num_sgprs_used + args->ac.num_vgprs_used;
for (int i = 0; i < 2; i++) {
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
unsigned face_vgpr = key->ps_prolog.num_input_sgprs + key->ps_prolog.face_vgpr_index;
LLVMValueRef interp[2], color[4];
LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
if (!writemask)
continue;
/* If the interpolation qualifier is not CONSTANT (-1). */
LLVMValueRef interp_ij = NULL;
if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
unsigned interp_vgpr =
key->ps_prolog.num_input_sgprs + key->ps_prolog.color_interp_vgpr_index[i];
unsigned index =
args->ac.num_sgprs_used + key->ps_prolog.color_interp_vgpr_index[i];
/* Get the (i,j) updated by bc_optimize handling. */
interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr, "");
interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr + 1, "");
LLVMValueRef interp[2] = {
LLVMBuildExtractValue(ctx->ac.builder, ret, index, ""),
LLVMBuildExtractValue(ctx->ac.builder, ret, index + 1, ""),
};
interp_ij = ac_build_gather_values(&ctx->ac, interp, 2);
}
/* Use the absolute location of the input. */
prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, args->ac.prim_mask);
LLVMValueRef face = NULL;
if (key->ps_prolog.states.color_two_side) {
face = LLVMGetParam(func, face_vgpr);
face = ac_get_arg(&ctx->ac, args->ac.front_face);
face = ac_to_integer(&ctx->ac, face);
}
LLVMValueRef color[4];
interp_fs_color(ctx, key->ps_prolog.color_attr_index[i], i, key->ps_prolog.num_interp_inputs,
key->ps_prolog.colors_read, interp_ij, prim_mask, face, color);
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, color[chan],
ctx->args->ac.arg_count + color_out_idx++, "");
num_input_gprs + color_out_idx++, "");
}
}
@ -688,8 +634,8 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
if (key->ps_prolog.states.samplemask_log_ps_iter) {
uint32_t ps_iter_mask =
ac_get_ps_iter_mask(1 << key->ps_prolog.states.samplemask_log_ps_iter);
LLVMValueRef sampleid = si_unpack_param(ctx, ancillary, 8, 4);
LLVMValueRef samplemask = ac_get_arg(&ctx->ac, param_sample_mask);
LLVMValueRef sampleid = si_unpack_param(ctx, args->ac.ancillary, 8, 4);
LLVMValueRef samplemask = ac_get_arg(&ctx->ac, args->ac.sample_coverage);
samplemask = ac_to_integer(&ctx->ac, samplemask);
samplemask =
@ -699,7 +645,7 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
"");
samplemask = ac_to_float(&ctx->ac, samplemask);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, samplemask, param_sample_mask.arg_index, "");
ret = insert_ret_of_arg(ctx, ret, samplemask, args->ac.sample_coverage.arg_index);
}
/* Tell LLVM to insert WQM instruction sequence when needed. */