mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc
This does change the behavior slightly:
If a shader writes COLOR[i] and that color buffer isn't bound,
the shader will export MRT_NULL instead and discard the IR tree that
calculates the output. The only exception is alpha-to-coverage, which
requires an alpha export.
v2: - update a comment about 16BPC
- account for MRTZ when when fixing alpha-test/kill
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
0446ea9d08
commit
8667a1aea2
7 changed files with 91 additions and 38 deletions
|
|
@ -252,6 +252,7 @@ struct r600_surface {
|
||||||
unsigned cb_color_fmask_slice; /* EG and later */
|
unsigned cb_color_fmask_slice; /* EG and later */
|
||||||
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
|
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
|
||||||
unsigned cb_color_mask; /* R600 only */
|
unsigned cb_color_mask; /* R600 only */
|
||||||
|
unsigned spi_shader_col_format; /* SI+ */
|
||||||
unsigned sx_ps_downconvert; /* Stoney only */
|
unsigned sx_ps_downconvert; /* Stoney only */
|
||||||
unsigned sx_blend_opt_epsilon; /* Stoney only */
|
unsigned sx_blend_opt_epsilon; /* Stoney only */
|
||||||
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
|
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,7 @@ struct si_framebuffer {
|
||||||
unsigned log_samples;
|
unsigned log_samples;
|
||||||
unsigned cb0_is_integer;
|
unsigned cb0_is_integer;
|
||||||
unsigned compressed_cb_mask;
|
unsigned compressed_cb_mask;
|
||||||
unsigned export_16bpc;
|
unsigned spi_shader_col_format;
|
||||||
unsigned dirty_cbufs;
|
unsigned dirty_cbufs;
|
||||||
bool dirty_zsbuf;
|
bool dirty_zsbuf;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
|
||||||
struct lp_build_context *uint =
|
struct lp_build_context *uint =
|
||||||
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
|
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
|
||||||
struct lp_build_context *base = &bld_base->base;
|
struct lp_build_context *base = &bld_base->base;
|
||||||
unsigned compressed = 0;
|
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
|
||||||
unsigned chan;
|
unsigned chan;
|
||||||
|
|
||||||
/* XXX: This controls which components of the output
|
/* XXX: This controls which components of the output
|
||||||
|
|
@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
|
||||||
args[3] = lp_build_const_int32(base->gallivm, target);
|
args[3] = lp_build_const_int32(base->gallivm, target);
|
||||||
|
|
||||||
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
|
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
|
||||||
|
unsigned col_formats =
|
||||||
|
si_shader_ctx->shader->key.ps.spi_shader_col_format;
|
||||||
int cbuf = target - V_008DFC_SQ_EXP_MRT;
|
int cbuf = target - V_008DFC_SQ_EXP_MRT;
|
||||||
|
|
||||||
if (cbuf >= 0 && cbuf < 8)
|
assert(cbuf >= 0 && cbuf < 8);
|
||||||
compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
|
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set COMPR flag */
|
args[4] = uint->zero; /* COMPR flag */
|
||||||
args[4] = compressed ? uint->one : uint->zero;
|
args[5] = base->undef;
|
||||||
|
args[6] = base->undef;
|
||||||
|
args[7] = base->undef;
|
||||||
|
args[8] = base->undef;
|
||||||
|
|
||||||
|
switch (spi_shader_col_format) {
|
||||||
|
case V_028714_SPI_SHADER_ZERO:
|
||||||
|
args[0] = uint->zero; /* writemask */
|
||||||
|
args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V_028714_SPI_SHADER_FP16_ABGR:
|
||||||
|
args[4] = uint->one; /* COMPR flag */
|
||||||
|
|
||||||
if (compressed) {
|
|
||||||
/* Pixel shader needs to pack output values before export */
|
|
||||||
for (chan = 0; chan < 2; chan++) {
|
for (chan = 0; chan < 2; chan++) {
|
||||||
LLVMValueRef pack_args[2] = {
|
LLVMValueRef pack_args[2] = {
|
||||||
values[2 * chan],
|
values[2 * chan],
|
||||||
|
|
@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
|
||||||
packed,
|
packed,
|
||||||
LLVMFloatTypeInContext(base->gallivm->context),
|
LLVMFloatTypeInContext(base->gallivm->context),
|
||||||
"");
|
"");
|
||||||
args[chan + 7] = base->undef;
|
|
||||||
}
|
}
|
||||||
} else
|
break;
|
||||||
|
|
||||||
|
case V_028714_SPI_SHADER_32_ABGR:
|
||||||
memcpy(&args[5], values, sizeof(values[0]) * 4);
|
memcpy(&args[5], values, sizeof(values[0]) * 4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
|
static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
|
||||||
|
|
@ -4031,7 +4046,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PIPE_SHADER_FRAGMENT:
|
case PIPE_SHADER_FRAGMENT:
|
||||||
fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc);
|
fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
|
||||||
fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
|
fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf);
|
||||||
fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
|
fprintf(f, " color_two_side = %u\n", key->ps.color_two_side);
|
||||||
fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
|
fprintf(f, " alpha_func = %u\n", key->ps.alpha_func);
|
||||||
|
|
|
||||||
|
|
@ -232,7 +232,7 @@ struct si_shader_selector {
|
||||||
|
|
||||||
union si_shader_key {
|
union si_shader_key {
|
||||||
struct {
|
struct {
|
||||||
unsigned export_16bpc:8;
|
unsigned spi_shader_col_format;
|
||||||
unsigned last_cbuf:3;
|
unsigned last_cbuf:3;
|
||||||
unsigned color_two_side:1;
|
unsigned color_two_side:1;
|
||||||
unsigned alpha_func:3;
|
unsigned alpha_func:3;
|
||||||
|
|
|
||||||
|
|
@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||||
if (!blend)
|
if (!blend)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
blend->alpha_to_coverage = state->alpha_to_coverage;
|
||||||
blend->alpha_to_one = state->alpha_to_one;
|
blend->alpha_to_one = state->alpha_to_one;
|
||||||
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
|
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
|
||||||
|
|
||||||
|
|
@ -1883,6 +1884,21 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
|
||||||
* framebuffer handling
|
* framebuffer handling
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static void si_choose_spi_color_formats(struct r600_surface *surf,
|
||||||
|
unsigned format, unsigned swap,
|
||||||
|
unsigned ntype)
|
||||||
|
{
|
||||||
|
unsigned max_comp_size = si_colorformat_max_comp_size(format);
|
||||||
|
|
||||||
|
surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
|
||||||
|
|
||||||
|
if (ntype == V_028C70_NUMBER_SRGB ||
|
||||||
|
((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
|
||||||
|
max_comp_size <= 10) ||
|
||||||
|
(ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
|
||||||
|
surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
|
||||||
|
}
|
||||||
|
|
||||||
static void si_initialize_color_surface(struct si_context *sctx,
|
static void si_initialize_color_surface(struct si_context *sctx,
|
||||||
struct r600_surface *surf)
|
struct r600_surface *surf)
|
||||||
{
|
{
|
||||||
|
|
@ -1896,7 +1912,6 @@ static void si_initialize_color_surface(struct si_context *sctx,
|
||||||
const struct util_format_description *desc;
|
const struct util_format_description *desc;
|
||||||
int i;
|
int i;
|
||||||
unsigned blend_clamp = 0, blend_bypass = 0;
|
unsigned blend_clamp = 0, blend_bypass = 0;
|
||||||
unsigned max_comp_size;
|
|
||||||
|
|
||||||
/* Layered rendering doesn't work with LINEAR_GENERAL.
|
/* Layered rendering doesn't work with LINEAR_GENERAL.
|
||||||
* (LINEAR_ALIGNED and others work) */
|
* (LINEAR_ALIGNED and others work) */
|
||||||
|
|
@ -2053,13 +2068,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Determine pixel shader export format */
|
/* Determine pixel shader export format */
|
||||||
max_comp_size = si_colorformat_max_comp_size(format);
|
si_choose_spi_color_formats(surf, format, swap, ntype);
|
||||||
if (ntype == V_028C70_NUMBER_SRGB ||
|
|
||||||
((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
|
|
||||||
max_comp_size <= 10) ||
|
|
||||||
(ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
|
|
||||||
surf->export_16bpc = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sctx->b.family == CHIP_STONEY &&
|
if (sctx->b.family == CHIP_STONEY &&
|
||||||
!(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
|
!(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
|
||||||
|
|
@ -2286,7 +2295,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
|
|
||||||
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
|
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
|
||||||
|
|
||||||
sctx->framebuffer.export_16bpc = 0;
|
sctx->framebuffer.spi_shader_col_format = 0;
|
||||||
sctx->framebuffer.compressed_cb_mask = 0;
|
sctx->framebuffer.compressed_cb_mask = 0;
|
||||||
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
|
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
|
||||||
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
|
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
|
||||||
|
|
@ -2307,22 +2316,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
si_initialize_color_surface(sctx, surf);
|
si_initialize_color_surface(sctx, surf);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (surf->export_16bpc) {
|
sctx->framebuffer.spi_shader_col_format |=
|
||||||
sctx->framebuffer.export_16bpc |= 1 << i;
|
surf->spi_shader_col_format << (i * 4);
|
||||||
}
|
|
||||||
|
|
||||||
if (rtex->fmask.size && rtex->cmask.size) {
|
if (rtex->fmask.size && rtex->cmask.size) {
|
||||||
sctx->framebuffer.compressed_cb_mask |= 1 << i;
|
sctx->framebuffer.compressed_cb_mask |= 1 << i;
|
||||||
}
|
}
|
||||||
r600_context_add_resource_size(ctx, surf->base.texture);
|
r600_context_add_resource_size(ctx, surf->base.texture);
|
||||||
}
|
}
|
||||||
/* Set the 16BPC export for possible dual-src blending. */
|
/* Set the second SPI format for possible dual-src blending. */
|
||||||
if (i == 1 && surf && surf->export_16bpc) {
|
if (i == 1 && surf) {
|
||||||
sctx->framebuffer.export_16bpc |= 1 << 1;
|
sctx->framebuffer.spi_shader_col_format |=
|
||||||
|
surf->spi_shader_col_format << (i * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!(sctx->framebuffer.export_16bpc & ~0xff));
|
|
||||||
|
|
||||||
if (state->zsbuf) {
|
if (state->zsbuf) {
|
||||||
surf = (struct r600_surface*)state->zsbuf;
|
surf = (struct r600_surface*)state->zsbuf;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ struct si_shader;
|
||||||
struct si_state_blend {
|
struct si_state_blend {
|
||||||
struct si_pm4_state pm4;
|
struct si_pm4_state pm4;
|
||||||
uint32_t cb_target_mask;
|
uint32_t cb_target_mask;
|
||||||
|
bool alpha_to_coverage;
|
||||||
bool alpha_to_one;
|
bool alpha_to_one;
|
||||||
bool dual_src_blend;
|
bool dual_src_blend;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -382,13 +382,28 @@ static void si_shader_vs(struct si_shader *shader)
|
||||||
si_set_tesseval_regs(shader, pm4);
|
si_set_tesseval_regs(shader, pm4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||||
|
{
|
||||||
|
unsigned value = shader->key.ps.spi_shader_col_format;
|
||||||
|
unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
|
||||||
|
|
||||||
|
/* If the i-th target format is set, all previous target formats must
|
||||||
|
* be non-zero to avoid hangs.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < num_targets; i++)
|
||||||
|
if (!(value & (0xf << (i * 4))))
|
||||||
|
value |= V_028714_SPI_SHADER_32_R << (i * 4);
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
static void si_shader_ps(struct si_shader *shader)
|
static void si_shader_ps(struct si_shader *shader)
|
||||||
{
|
{
|
||||||
struct tgsi_shader_info *info = &shader->selector->info;
|
struct tgsi_shader_info *info = &shader->selector->info;
|
||||||
struct si_pm4_state *pm4;
|
struct si_pm4_state *pm4;
|
||||||
unsigned i, spi_ps_in_control;
|
unsigned i, spi_ps_in_control, spi_shader_col_format;
|
||||||
unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
|
unsigned cb_shader_mask = 0;
|
||||||
unsigned colors_written, export_16bpc;
|
unsigned colors_written;
|
||||||
unsigned num_sgprs, num_user_sgprs;
|
unsigned num_sgprs, num_user_sgprs;
|
||||||
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
|
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
|
||||||
uint64_t va;
|
uint64_t va;
|
||||||
|
|
@ -425,7 +440,6 @@ static void si_shader_ps(struct si_shader *shader)
|
||||||
|
|
||||||
/* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
|
/* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
|
||||||
colors_written = info->colors_written;
|
colors_written = info->colors_written;
|
||||||
export_16bpc = shader->key.ps.export_16bpc;
|
|
||||||
|
|
||||||
if (info->colors_written == 0x1 &&
|
if (info->colors_written == 0x1 &&
|
||||||
info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
|
info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
|
||||||
|
|
@ -434,13 +448,20 @@ static void si_shader_ps(struct si_shader *shader)
|
||||||
|
|
||||||
while (colors_written) {
|
while (colors_written) {
|
||||||
i = u_bit_scan(&colors_written);
|
i = u_bit_scan(&colors_written);
|
||||||
if (export_16bpc & (1 << i))
|
|
||||||
spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i);
|
|
||||||
else
|
|
||||||
spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i);
|
|
||||||
cb_shader_mask |= 0xf << (4 * i);
|
cb_shader_mask |= 0xf << (4 * i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spi_shader_col_format = si_get_spi_shader_col_format(shader);
|
||||||
|
|
||||||
|
/* This must be non-zero for alpha-test/kill to work.
|
||||||
|
* The hardware ignores the EXEC mask if no export memory is allocated.
|
||||||
|
*/
|
||||||
|
if (!spi_shader_col_format &&
|
||||||
|
!info->writes_z && !info->writes_stencil && !info->writes_samplemask &&
|
||||||
|
(shader->selector->info.uses_kill ||
|
||||||
|
shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
|
||||||
|
spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
||||||
|
|
||||||
/* Set interpolation controls. */
|
/* Set interpolation controls. */
|
||||||
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
|
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
|
||||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
|
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
|
||||||
|
|
@ -571,12 +592,20 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
||||||
break;
|
break;
|
||||||
case PIPE_SHADER_FRAGMENT: {
|
case PIPE_SHADER_FRAGMENT: {
|
||||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||||
|
struct si_state_blend *blend = sctx->queued.named.blend;
|
||||||
|
|
||||||
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
|
if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
|
||||||
sel->info.colors_written == 0x1)
|
sel->info.colors_written == 0x1)
|
||||||
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
|
key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
|
||||||
|
|
||||||
key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
|
key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
|
||||||
|
|
||||||
|
/* If alpha-to-coverage is enabled, we have to export alpha
|
||||||
|
* even if there is no color buffer.
|
||||||
|
*/
|
||||||
|
if (!(key->ps.spi_shader_col_format & 0xf) &&
|
||||||
|
blend && blend->alpha_to_coverage)
|
||||||
|
key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_FP16_ABGR;
|
||||||
|
|
||||||
if (rs) {
|
if (rs) {
|
||||||
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
|
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue