mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 14:40:13 +01:00
radeonsi: add PS epilog
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
eb10919b83
commit
e79bb746ab
4 changed files with 297 additions and 2 deletions
|
|
@ -542,6 +542,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
|
|||
sscreen->vs_prologs,
|
||||
sscreen->vs_epilogs,
|
||||
sscreen->tcs_epilogs,
|
||||
sscreen->ps_epilogs
|
||||
};
|
||||
unsigned i;
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ struct si_screen {
|
|||
struct si_shader_part *vs_prologs;
|
||||
struct si_shader_part *vs_epilogs;
|
||||
struct si_shader_part *tcs_epilogs;
|
||||
struct si_shader_part *ps_epilogs;
|
||||
};
|
||||
|
||||
struct si_blend_color {
|
||||
|
|
|
|||
|
|
@ -131,6 +131,11 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
|
|||
LLVMTargetMachineRef tm,
|
||||
struct tgsi_shader_info *info);
|
||||
|
||||
/* Ideally pass the sample mask input to the PS epilog as v13, which
|
||||
* is its usual location, so that the shader doesn't have to add v_mov.
|
||||
*/
|
||||
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
|
||||
|
||||
/* The VS location of the PrimitiveID input is the same in the epilog,
|
||||
* so that the main shader part doesn't have to move it.
|
||||
*/
|
||||
|
|
@ -2530,6 +2535,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
|
|||
si_export_mrt_z(bld_base, depth, stencil, samplemask);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return PS outputs in this order:
|
||||
*
|
||||
* v[0:3] = color0.xyzw
|
||||
* v[4:7] = color1.xyzw
|
||||
* ...
|
||||
* vN+0 = Depth
|
||||
* vN+1 = Stencil
|
||||
* vN+2 = SampleMask
|
||||
* vN+3 = SampleMaskIn (used for OpenGL smoothing)
|
||||
*
|
||||
* The alpha-ref SGPR is returned via its original location.
|
||||
*/
|
||||
static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
struct si_shader *shader = ctx->shader;
|
||||
struct lp_build_context *base = &bld_base->base;
|
||||
struct tgsi_shader_info *info = &shader->selector->info;
|
||||
LLVMBuilderRef builder = base->gallivm->builder;
|
||||
unsigned i, j, first_vgpr, vgpr;
|
||||
|
||||
LLVMValueRef color[8][4] = {};
|
||||
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
|
||||
LLVMValueRef ret;
|
||||
|
||||
/* Read the output values. */
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
unsigned semantic_name = info->output_semantic_name[i];
|
||||
unsigned semantic_index = info->output_semantic_index[i];
|
||||
|
||||
switch (semantic_name) {
|
||||
case TGSI_SEMANTIC_COLOR:
|
||||
assert(semantic_index < 8);
|
||||
for (j = 0; j < 4; j++) {
|
||||
LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j];
|
||||
LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
|
||||
color[semantic_index][j] = result;
|
||||
}
|
||||
break;
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
depth = LLVMBuildLoad(builder,
|
||||
ctx->radeon_bld.soa.outputs[i][2], "");
|
||||
break;
|
||||
case TGSI_SEMANTIC_STENCIL:
|
||||
stencil = LLVMBuildLoad(builder,
|
||||
ctx->radeon_bld.soa.outputs[i][1], "");
|
||||
break;
|
||||
case TGSI_SEMANTIC_SAMPLEMASK:
|
||||
samplemask = LLVMBuildLoad(builder,
|
||||
ctx->radeon_bld.soa.outputs[i][0], "");
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
|
||||
semantic_name);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill the return structure. */
|
||||
ret = ctx->return_value;
|
||||
|
||||
/* Set SGPRs. */
|
||||
ret = LLVMBuildInsertValue(builder, ret,
|
||||
bitcast(bld_base, TGSI_TYPE_SIGNED,
|
||||
LLVMGetParam(ctx->radeon_bld.main_fn,
|
||||
SI_PARAM_ALPHA_REF)),
|
||||
SI_SGPR_ALPHA_REF, "");
|
||||
|
||||
/* Set VGPRs */
|
||||
first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
|
||||
for (i = 0; i < ARRAY_SIZE(color); i++) {
|
||||
if (!color[i][0])
|
||||
continue;
|
||||
|
||||
for (j = 0; j < 4; j++)
|
||||
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
|
||||
}
|
||||
if (depth)
|
||||
ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
|
||||
if (stencil)
|
||||
ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
|
||||
if (samplemask)
|
||||
ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
|
||||
|
||||
/* Add the input sample mask for smoothing at the end. */
|
||||
if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
|
||||
vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
|
||||
ret = LLVMBuildInsertValue(builder, ret,
|
||||
LLVMGetParam(ctx->radeon_bld.main_fn,
|
||||
SI_PARAM_SAMPLE_COVERAGE), vgpr++, "");
|
||||
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
|
||||
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
struct lp_build_emit_data *emit_data);
|
||||
|
|
@ -3723,7 +3822,7 @@ static void create_function(struct si_shader_context *ctx)
|
|||
struct si_shader *shader = ctx->shader;
|
||||
LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
|
||||
LLVMTypeRef returns[16+32*4];
|
||||
unsigned i, last_array_pointer, last_sgpr, num_params;
|
||||
unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs;
|
||||
unsigned num_returns = 0;
|
||||
|
||||
v3i32 = LLVMVectorType(ctx->i32, 3);
|
||||
|
|
@ -3869,6 +3968,27 @@ static void create_function(struct si_shader_context *ctx)
|
|||
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
|
||||
params[SI_PARAM_POS_FIXED_PT] = ctx->f32;
|
||||
num_params = SI_PARAM_POS_FIXED_PT+1;
|
||||
|
||||
if (!ctx->is_monolithic) {
|
||||
/* Outputs for the epilog. */
|
||||
num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
|
||||
num_returns =
|
||||
num_return_sgprs +
|
||||
util_bitcount(shader->selector->info.colors_written) * 4 +
|
||||
shader->selector->info.writes_z +
|
||||
shader->selector->info.writes_stencil +
|
||||
shader->selector->info.writes_samplemask +
|
||||
1 /* SampleMaskIn */;
|
||||
|
||||
num_returns = MAX2(num_returns,
|
||||
num_return_sgprs +
|
||||
PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
|
||||
|
||||
for (i = 0; i < num_return_sgprs; i++)
|
||||
returns[i] = ctx->i32;
|
||||
for (; i < num_returns; i++)
|
||||
returns[i] = ctx->f32;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -4664,7 +4784,10 @@ static int si_compile_tgsi_shader(struct si_screen *sscreen,
|
|||
break;
|
||||
case TGSI_PROCESSOR_FRAGMENT:
|
||||
ctx.radeon_bld.load_input = declare_input_fs;
|
||||
bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
|
||||
if (is_monolithic)
|
||||
bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
|
||||
else
|
||||
bld_base->emit_epilogue = si_llvm_return_fs_outputs;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unsupported shader type");
|
||||
|
|
@ -5181,6 +5304,159 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
|
|||
return shader->epilog != NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile the pixel shader epilog. This handles everything that must be
|
||||
* emulated for pixel shader exports. (alpha-test, format conversions, etc)
|
||||
*/
|
||||
static bool si_compile_ps_epilog(struct si_screen *sscreen,
|
||||
LLVMTargetMachineRef tm,
|
||||
struct pipe_debug_callback *debug,
|
||||
struct si_shader_part *out)
|
||||
{
|
||||
union si_shader_part_key *key = &out->key;
|
||||
struct si_shader shader = {};
|
||||
struct si_shader_context ctx;
|
||||
struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
|
||||
struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base;
|
||||
LLVMTypeRef params[16+8*4+3];
|
||||
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
|
||||
int last_array_pointer, last_sgpr, num_params, i;
|
||||
bool status = true;
|
||||
|
||||
si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
|
||||
ctx.type = TGSI_PROCESSOR_FRAGMENT;
|
||||
shader.key.ps.epilog = key->ps_epilog.states;
|
||||
|
||||
/* Declare input SGPRs. */
|
||||
params[SI_PARAM_RW_BUFFERS] = ctx.i64;
|
||||
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
|
||||
params[SI_PARAM_SAMPLERS] = ctx.i64;
|
||||
params[SI_PARAM_UNUSED] = ctx.i64;
|
||||
params[SI_PARAM_ALPHA_REF] = ctx.f32;
|
||||
last_array_pointer = -1;
|
||||
last_sgpr = SI_PARAM_ALPHA_REF;
|
||||
|
||||
/* Declare input VGPRs. */
|
||||
num_params = (last_sgpr + 1) +
|
||||
util_bitcount(key->ps_epilog.colors_written) * 4 +
|
||||
key->ps_epilog.writes_z +
|
||||
key->ps_epilog.writes_stencil +
|
||||
key->ps_epilog.writes_samplemask;
|
||||
|
||||
num_params = MAX2(num_params,
|
||||
last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
|
||||
|
||||
assert(num_params <= ARRAY_SIZE(params));
|
||||
|
||||
for (i = last_sgpr + 1; i < num_params; i++)
|
||||
params[i] = ctx.f32;
|
||||
|
||||
/* Create the function. */
|
||||
si_create_function(&ctx, NULL, 0, params, num_params,
|
||||
last_array_pointer, last_sgpr);
|
||||
/* Disable elimination of unused inputs. */
|
||||
radeon_llvm_add_attribute(ctx.radeon_bld.main_fn,
|
||||
"InitialPSInputAddr", 0xffffff);
|
||||
|
||||
/* Process colors. */
|
||||
unsigned vgpr = last_sgpr + 1;
|
||||
unsigned colors_written = key->ps_epilog.colors_written;
|
||||
int last_color_export = -1;
|
||||
|
||||
/* Find the last color export. */
|
||||
if (!key->ps_epilog.writes_z &&
|
||||
!key->ps_epilog.writes_stencil &&
|
||||
!key->ps_epilog.writes_samplemask) {
|
||||
unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
|
||||
|
||||
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
|
||||
if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
|
||||
/* Just set this if any of the colorbuffers are enabled. */
|
||||
if (spi_format &
|
||||
((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
|
||||
last_color_export = 0;
|
||||
} else {
|
||||
for (i = 0; i < 8; i++)
|
||||
if (colors_written & (1 << i) &&
|
||||
(spi_format >> (i * 4)) & 0xf)
|
||||
last_color_export = i;
|
||||
}
|
||||
}
|
||||
|
||||
while (colors_written) {
|
||||
LLVMValueRef color[4];
|
||||
int mrt = u_bit_scan(&colors_written);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
|
||||
|
||||
si_export_mrt_color(bld_base, color, mrt,
|
||||
num_params - 1,
|
||||
mrt == last_color_export);
|
||||
}
|
||||
|
||||
/* Process depth, stencil, samplemask. */
|
||||
if (key->ps_epilog.writes_z)
|
||||
depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
|
||||
if (key->ps_epilog.writes_stencil)
|
||||
stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
|
||||
if (key->ps_epilog.writes_samplemask)
|
||||
samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
|
||||
|
||||
if (depth || stencil || samplemask)
|
||||
si_export_mrt_z(bld_base, depth, stencil, samplemask);
|
||||
else if (last_color_export == -1)
|
||||
si_export_null(bld_base);
|
||||
|
||||
/* Compile. */
|
||||
LLVMBuildRetVoid(gallivm->builder);
|
||||
radeon_llvm_finalize_module(&ctx.radeon_bld);
|
||||
|
||||
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
|
||||
gallivm->module, debug, ctx.type,
|
||||
"Fragment Shader Epilog"))
|
||||
status = false;
|
||||
|
||||
radeon_llvm_dispose(&ctx.radeon_bld);
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select and compile (or reuse) pixel shader parts (prolog & epilog).
|
||||
*/
|
||||
static bool si_shader_select_ps_parts(struct si_screen *sscreen,
|
||||
LLVMTargetMachineRef tm,
|
||||
struct si_shader *shader,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct tgsi_shader_info *info = &shader->selector->info;
|
||||
union si_shader_part_key epilog_key;
|
||||
|
||||
/* Get the epilog. */
|
||||
memset(&epilog_key, 0, sizeof(epilog_key));
|
||||
epilog_key.ps_epilog.colors_written = info->colors_written;
|
||||
epilog_key.ps_epilog.writes_z = info->writes_z;
|
||||
epilog_key.ps_epilog.writes_stencil = info->writes_stencil;
|
||||
epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask;
|
||||
epilog_key.ps_epilog.states = shader->key.ps.epilog;
|
||||
|
||||
shader->epilog =
|
||||
si_get_shader_part(sscreen, &sscreen->ps_epilogs,
|
||||
&epilog_key, tm, debug,
|
||||
si_compile_ps_epilog);
|
||||
if (!shader->epilog)
|
||||
return false;
|
||||
|
||||
/* The sample mask input is always enabled, because the API shader always
|
||||
* passes it through to the epilog. Disable it here if it's unused.
|
||||
*/
|
||||
if (!shader->key.ps.epilog.poly_line_smoothing &&
|
||||
!shader->selector->info.reads_samplemask)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
||||
struct si_shader *shader,
|
||||
struct pipe_debug_callback *debug)
|
||||
|
|
@ -5207,6 +5483,16 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
|||
if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
|
||||
return -1;
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
|
||||
return -1;
|
||||
|
||||
/* Make sure we have at least as many VGPRs as there
|
||||
* are allocated inputs.
|
||||
*/
|
||||
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
|
||||
shader->num_input_vgprs);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update SGPR and VGPR counts. */
|
||||
|
|
|
|||
|
|
@ -281,6 +281,13 @@ union si_shader_part_key {
|
|||
struct {
|
||||
struct si_tcs_epilog_bits states;
|
||||
} tcs_epilog;
|
||||
struct {
|
||||
struct si_ps_epilog_bits states;
|
||||
unsigned colors_written:8;
|
||||
unsigned writes_z:1;
|
||||
unsigned writes_stencil:1;
|
||||
unsigned writes_samplemask:1;
|
||||
} ps_epilog;
|
||||
};
|
||||
|
||||
union si_shader_key {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue