mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-14 06:30:26 +01:00
radeonsi: remove CB_RESOLVE
It's rarely used because a custom pixel or compute shader is almost always faster, and we have those already. RGB<->BGR swapping and microtile mode switching existed only for CB_RESOLVE and are removed too. RADV could also remove CB_RESOLVE, but it should probably use a pixel shader until it can use ac_nir_meta_cs_blit, which is the fastest option for gfx12. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39872>
This commit is contained in:
parent
aa92b464f3
commit
1a105e1b1f
9 changed files with 18 additions and 402 deletions
|
|
@ -20,8 +20,6 @@ enum
|
|||
SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES | SI_SAVE_FRAGMENT_STATE,
|
||||
|
||||
SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
|
||||
|
||||
SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
|
||||
};
|
||||
|
||||
void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
|
||||
|
|
@ -1036,163 +1034,6 @@ void si_gfx_copy_image(struct si_context *sctx, struct pipe_resource *dst,
|
|||
pipe_sampler_view_reference(&src_view, NULL);
|
||||
}
|
||||
|
||||
static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_info *info,
|
||||
struct pipe_resource *dst, unsigned dst_level, unsigned dst_z,
|
||||
enum pipe_format format)
|
||||
{
|
||||
/* Required before and after CB_RESOLVE. */
|
||||
si_set_barrier_flags(sctx, SI_BARRIER_SYNC_AND_INV_CB);
|
||||
|
||||
si_blitter_begin(
|
||||
sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
|
||||
util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, info->src.resource,
|
||||
info->src.box.z, ~0, sctx->custom_blend_resolve, format);
|
||||
si_blitter_end(sctx);
|
||||
|
||||
/* Flush caches for possible texturing. */
|
||||
si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */);
|
||||
}
|
||||
|
||||
static bool resolve_formats_compatible(enum pipe_format src, enum pipe_format dst,
|
||||
bool src_swaps_rgb_to_bgr, bool *need_rgb_to_bgr)
|
||||
{
|
||||
*need_rgb_to_bgr = false;
|
||||
|
||||
if (src_swaps_rgb_to_bgr) {
|
||||
/* We must only check the swapped format. */
|
||||
enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
|
||||
assert(swapped_src);
|
||||
return util_is_format_compatible(util_format_description(swapped_src),
|
||||
util_format_description(dst));
|
||||
}
|
||||
|
||||
if (util_is_format_compatible(util_format_description(src), util_format_description(dst)))
|
||||
return true;
|
||||
|
||||
enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
|
||||
*need_rgb_to_bgr = util_is_format_compatible(util_format_description(swapped_src),
|
||||
util_format_description(dst));
|
||||
return *need_rgb_to_bgr;
|
||||
}
|
||||
|
||||
bool si_msaa_resolve_blit_via_CB(struct pipe_context *ctx, const struct pipe_blit_info *info,
|
||||
bool fail_if_slow)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
/* Gfx11 doesn't have CB_RESOLVE. */
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
return false;
|
||||
|
||||
struct si_texture *src = (struct si_texture *)info->src.resource;
|
||||
struct si_texture *dst = (struct si_texture *)info->dst.resource;
|
||||
unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
|
||||
unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
|
||||
enum pipe_format format = info->src.format;
|
||||
unsigned num_channels = util_format_description(format)->nr_channels;
|
||||
|
||||
/* Check basic requirements for hw resolve. */
|
||||
if (!(info->src.resource->nr_samples > 1 && info->dst.resource->nr_samples <= 1 &&
|
||||
!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format) &&
|
||||
util_max_layer(info->src.resource, 0) == 0))
|
||||
return false;
|
||||
|
||||
/* Return if this is slower than alternatives. */
|
||||
if (fail_if_slow) {
|
||||
/* CB_RESOLVE is much slower without FMASK. */
|
||||
if (sctx->screen->debug_flags & DBG(NO_FMASK))
|
||||
return false;
|
||||
|
||||
/* Verified on: Tahiti, Hawaii, Tonga, Vega10, Navi10, Navi21 */
|
||||
switch (sctx->gfx_level) {
|
||||
case GFX6:
|
||||
return false;
|
||||
|
||||
case GFX7:
|
||||
if (src->surface.bpe != 16)
|
||||
return false;
|
||||
break;
|
||||
|
||||
case GFX8:
|
||||
case GFX9:
|
||||
case GFX10:
|
||||
return false;
|
||||
|
||||
case GFX10_3:
|
||||
if (!(src->surface.bpe == 8 && src->buffer.b.b.nr_samples == 8 && num_channels == 4) &&
|
||||
!(src->surface.bpe == 16 && src->buffer.b.b.nr_samples == 4))
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("unexpected gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
/* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
|
||||
* the format is R16G16. Use R16A16, which does work.
|
||||
*/
|
||||
if (format == PIPE_FORMAT_R16G16_UNORM)
|
||||
format = PIPE_FORMAT_R16A16_UNORM;
|
||||
if (format == PIPE_FORMAT_R16G16_SNORM)
|
||||
format = PIPE_FORMAT_R16A16_SNORM;
|
||||
|
||||
bool need_rgb_to_bgr = false;
|
||||
|
||||
/* Check the remaining requirements for hw resolve. */
|
||||
if (util_max_layer(info->dst.resource, info->dst.level) == 0 && !info->scissor_enable &&
|
||||
!info->swizzle_enable &&
|
||||
(info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
|
||||
resolve_formats_compatible(info->src.format, info->dst.format,
|
||||
src->swap_rgb_to_bgr, &need_rgb_to_bgr) &&
|
||||
dst_width == info->src.resource->width0 && dst_height == info->src.resource->height0 &&
|
||||
info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.width == dst_width &&
|
||||
info->dst.box.height == dst_height && info->dst.box.depth == 1 && info->src.box.x == 0 &&
|
||||
info->src.box.y == 0 && info->src.box.width == dst_width &&
|
||||
info->src.box.height == dst_height && info->src.box.depth == 1 && !dst->surface.is_linear &&
|
||||
(!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
|
||||
/* Check the remaining constraints. */
|
||||
if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode ||
|
||||
need_rgb_to_bgr) {
|
||||
/* Changing the microtile mode is not possible with GFX10. */
|
||||
if (sctx->gfx_level >= GFX10)
|
||||
return false;
|
||||
|
||||
/* The next fast clear will switch to this mode to
|
||||
* get direct hw resolve next time if the mode is
|
||||
* different now.
|
||||
*/
|
||||
if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode)
|
||||
src->last_msaa_resolve_target_micro_mode = dst->surface.micro_tile_mode;
|
||||
if (need_rgb_to_bgr)
|
||||
src->swap_rgb_to_bgr_on_next_clear = true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Resolving into a surface with DCC is unsupported. Since
|
||||
* it's being overwritten anyway, clear it to uncompressed.
|
||||
*/
|
||||
if (vi_dcc_enabled(dst, info->dst.level)) {
|
||||
struct si_clear_info clear_info;
|
||||
|
||||
if (!vi_dcc_get_clear_info(sctx, dst, info->dst.level, DCC_UNCOMPRESSED, &clear_info))
|
||||
return false;
|
||||
|
||||
si_barrier_before_image_fast_clear(sctx, SI_CLEAR_TYPE_DCC);
|
||||
si_execute_clears(sctx, &clear_info, 1, info->render_condition_enable);
|
||||
si_barrier_after_image_fast_clear(sctx);
|
||||
dst->dirty_level_mask &= ~(1 << info->dst.level);
|
||||
}
|
||||
|
||||
/* Resolve directly from src to dst. */
|
||||
si_do_CB_resolve(sctx, info, info->dst.resource, info->dst.level, info->dst.box.z, format);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
|
@ -1233,12 +1074,6 @@ static void si_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
|
|||
simple_mtx_unlock(&sscreen->async_compute_context_lock);
|
||||
}
|
||||
|
||||
if (unlikely(sctx->sqtt_enabled))
|
||||
sctx->sqtt_next_event = EventCmdResolveImage;
|
||||
|
||||
if (si_msaa_resolve_blit_via_CB(ctx, info, true))
|
||||
return;
|
||||
|
||||
if (unlikely(sctx->sqtt_enabled))
|
||||
sctx->sqtt_next_event = EventCmdCopyImage;
|
||||
|
||||
|
|
|
|||
|
|
@ -128,9 +128,6 @@ static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_
|
|||
uc.ui[0] = color->ui[0];
|
||||
uc.ui[1] = color->ui[3];
|
||||
} else {
|
||||
if (tex->swap_rgb_to_bgr)
|
||||
surface_format = util_format_rgb_to_bgr(surface_format);
|
||||
|
||||
util_pack_color_union(surface_format, &uc, color);
|
||||
}
|
||||
|
||||
|
|
@ -464,113 +461,6 @@ bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsi
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Set the same micro tile mode as the destination of the last MSAA resolve.
|
||||
* This allows hitting the MSAA resolve fast path, which requires that both
|
||||
* src and dst micro tile modes match.
|
||||
*/
|
||||
static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex)
|
||||
{
|
||||
if (sscreen->info.gfx_level >= GFX10 || tex->buffer.b.is_shared ||
|
||||
tex->buffer.b.b.nr_samples <= 1 ||
|
||||
tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)
|
||||
return;
|
||||
|
||||
assert(sscreen->info.gfx_level >= GFX9 ||
|
||||
tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
|
||||
assert(tex->buffer.b.b.last_level == 0);
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX9) {
|
||||
/* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
|
||||
assert(tex->surface.u.gfx9.swizzle_mode >= 4);
|
||||
|
||||
/* If you do swizzle_mode % 4, you'll get:
|
||||
* 0 = Depth
|
||||
* 1 = Standard,
|
||||
* 2 = Displayable
|
||||
* 3 = Rotated
|
||||
*
|
||||
* Depth-sample order isn't allowed:
|
||||
*/
|
||||
assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0);
|
||||
|
||||
switch (tex->last_msaa_resolve_target_micro_mode) {
|
||||
case RADEON_MICRO_MODE_DISPLAY:
|
||||
tex->surface.u.gfx9.swizzle_mode &= ~0x3;
|
||||
tex->surface.u.gfx9.swizzle_mode += 2; /* D */
|
||||
break;
|
||||
case RADEON_MICRO_MODE_STANDARD:
|
||||
tex->surface.u.gfx9.swizzle_mode &= ~0x3;
|
||||
tex->surface.u.gfx9.swizzle_mode += 1; /* S */
|
||||
break;
|
||||
case RADEON_MICRO_MODE_RENDER:
|
||||
tex->surface.u.gfx9.swizzle_mode &= ~0x3;
|
||||
tex->surface.u.gfx9.swizzle_mode += 3; /* R */
|
||||
break;
|
||||
default: /* depth */
|
||||
assert(!"unexpected micro mode");
|
||||
return;
|
||||
}
|
||||
} else if (sscreen->info.gfx_level >= GFX7) {
|
||||
/* These magic numbers were copied from addrlib. It doesn't use
|
||||
* any definitions for them either. They are all 2D_TILED_THIN1
|
||||
* modes with different bpp and micro tile mode.
|
||||
*/
|
||||
switch (tex->last_msaa_resolve_target_micro_mode) {
|
||||
case RADEON_MICRO_MODE_DISPLAY:
|
||||
tex->surface.u.legacy.tiling_index[0] = 10;
|
||||
break;
|
||||
case RADEON_MICRO_MODE_STANDARD:
|
||||
tex->surface.u.legacy.tiling_index[0] = 14;
|
||||
break;
|
||||
case RADEON_MICRO_MODE_RENDER:
|
||||
tex->surface.u.legacy.tiling_index[0] = 28;
|
||||
break;
|
||||
default: /* depth, thick */
|
||||
assert(!"unexpected micro mode");
|
||||
return;
|
||||
}
|
||||
} else { /* GFX6 */
|
||||
switch (tex->last_msaa_resolve_target_micro_mode) {
|
||||
case RADEON_MICRO_MODE_DISPLAY:
|
||||
switch (tex->surface.bpe) {
|
||||
case 1:
|
||||
tex->surface.u.legacy.tiling_index[0] = 10;
|
||||
break;
|
||||
case 2:
|
||||
tex->surface.u.legacy.tiling_index[0] = 11;
|
||||
break;
|
||||
default: /* 4, 8 */
|
||||
tex->surface.u.legacy.tiling_index[0] = 12;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RADEON_MICRO_MODE_STANDARD:
|
||||
switch (tex->surface.bpe) {
|
||||
case 1:
|
||||
tex->surface.u.legacy.tiling_index[0] = 14;
|
||||
break;
|
||||
case 2:
|
||||
tex->surface.u.legacy.tiling_index[0] = 15;
|
||||
break;
|
||||
case 4:
|
||||
tex->surface.u.legacy.tiling_index[0] = 16;
|
||||
break;
|
||||
default: /* 8, 16 */
|
||||
tex->surface.u.legacy.tiling_index[0] = 17;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default: /* depth, thick */
|
||||
assert(!"unexpected micro mode");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;
|
||||
|
||||
p_atomic_inc(&sscreen->dirty_tex_counter);
|
||||
}
|
||||
|
||||
static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
|
||||
{
|
||||
if (tex->htile_stencil_disabled)
|
||||
|
|
@ -628,20 +518,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* We can change the micro tile mode before a full clear. */
|
||||
/* This is only used for MSAA textures when clearing all layers. */
|
||||
si_set_optimal_micro_tile_mode(sctx->screen, tex);
|
||||
|
||||
if (tex->swap_rgb_to_bgr_on_next_clear) {
|
||||
assert(!tex->swap_rgb_to_bgr);
|
||||
assert(tex->buffer.b.b.nr_samples >= 2);
|
||||
tex->swap_rgb_to_bgr = true;
|
||||
tex->swap_rgb_to_bgr_on_next_clear = false;
|
||||
|
||||
/* Update all sampler views and images. */
|
||||
p_atomic_inc(&sctx->screen->dirty_tex_counter);
|
||||
}
|
||||
|
||||
/* only supported on tiled surfaces */
|
||||
if (tex->surface.is_linear) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -325,16 +325,6 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
|
|||
state[4] &= C_008F20_PITCH;
|
||||
state[4] |= S_008F20_PITCH(epitch);
|
||||
}
|
||||
|
||||
if (tex->swap_rgb_to_bgr) {
|
||||
unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]);
|
||||
unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]);
|
||||
|
||||
state[3] &= C_008F1C_DST_SEL_X;
|
||||
state[3] |= S_008F1C_DST_SEL_X(swizzle_z);
|
||||
state[3] &= C_008F1C_DST_SEL_Z;
|
||||
state[3] |= S_008F1C_DST_SEL_Z(swizzle_x);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
|
||||
|
|
|
|||
|
|
@ -145,7 +145,6 @@ static const struct debug_named_value test_options[] = {
|
|||
{"clearbuffer", DBG(TEST_CLEAR_BUFFER), "Test correctness of the clear_buffer compute shader"},
|
||||
{"copybuffer", DBG(TEST_COPY_BUFFER), "Test correctness of the copy_buffer compute shader"},
|
||||
{"imagecopy", DBG(TEST_IMAGE_COPY), "Invoke resource_copy_region tests with images and exit."},
|
||||
{"cbresolve", DBG(TEST_CB_RESOLVE), "Invoke MSAA resolve tests and exit."},
|
||||
{"computeblit", DBG(TEST_COMPUTE_BLIT), "Invoke blits tests and exit."},
|
||||
{"testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit."},
|
||||
{"testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit."},
|
||||
|
|
@ -258,8 +257,6 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
|
||||
if (sctx->custom_dsa_flush)
|
||||
sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
|
||||
if (sctx->custom_blend_resolve)
|
||||
sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_resolve);
|
||||
if (sctx->custom_blend_fmask_decompress)
|
||||
sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_fmask_decompress);
|
||||
if (sctx->custom_blend_eliminate_fastclear)
|
||||
|
|
@ -1669,7 +1666,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
if (test_flags & DBG(TEST_IMAGE_COPY))
|
||||
si_test_image_copy_region(sscreen);
|
||||
|
||||
if (test_flags & (DBG(TEST_CB_RESOLVE) | DBG(TEST_COMPUTE_BLIT)))
|
||||
if (test_flags & DBG(TEST_COMPUTE_BLIT))
|
||||
si_test_blit(sscreen, test_flags);
|
||||
|
||||
if (test_flags & DBG(TEST_DMA_PERF))
|
||||
|
|
|
|||
|
|
@ -253,7 +253,6 @@ enum
|
|||
DBG_TEST_CLEAR_BUFFER,
|
||||
DBG_TEST_COPY_BUFFER,
|
||||
DBG_TEST_IMAGE_COPY,
|
||||
DBG_TEST_CB_RESOLVE,
|
||||
DBG_TEST_COMPUTE_BLIT,
|
||||
DBG_TEST_VMFAULT_CP,
|
||||
DBG_TEST_VMFAULT_SHADER,
|
||||
|
|
@ -359,9 +358,6 @@ struct si_texture {
|
|||
struct si_resource *cmask_buffer;
|
||||
unsigned cb_color_info; /* fast clear enable bit */
|
||||
unsigned color_clear_value[2]; /* not on gfx11 */
|
||||
unsigned last_msaa_resolve_target_micro_mode;
|
||||
bool swap_rgb_to_bgr_on_next_clear;
|
||||
bool swap_rgb_to_bgr;
|
||||
unsigned num_level0_transfers;
|
||||
unsigned plane_index; /* other planes are different pipe_resources */
|
||||
unsigned num_planes;
|
||||
|
|
@ -946,7 +942,6 @@ struct si_context {
|
|||
void *no_velems_state;
|
||||
void *discard_rasterizer_state;
|
||||
void *custom_dsa_flush;
|
||||
void *custom_blend_resolve;
|
||||
void *custom_blend_fmask_decompress;
|
||||
void *custom_blend_eliminate_fastclear;
|
||||
void *custom_blend_dcc_decompress;
|
||||
|
|
@ -1414,8 +1409,6 @@ void si_gfx_copy_image(struct si_context *sctx, struct pipe_resource *dst,
|
|||
const struct pipe_box *src_box);
|
||||
void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex);
|
||||
void si_flush_implicit_resources(struct si_context *sctx);
|
||||
bool si_msaa_resolve_blit_via_CB(struct pipe_context *ctx, const struct pipe_blit_info *info,
|
||||
bool fail_if_slow);
|
||||
void si_gfx_blit(struct pipe_context *ctx, const struct pipe_blit_info *info);
|
||||
|
||||
/* si_nir_optim.c */
|
||||
|
|
|
|||
|
|
@ -546,7 +546,8 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
|||
ac_pm4_set_reg(&pm4->base, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]);
|
||||
|
||||
/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
|
||||
if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE ||
|
||||
assert(mode != V_028808_CB_RESOLVE); /* never used */
|
||||
if (blend->dual_src_blend || logicop_enable ||
|
||||
/* Disabling RB+ improves blending performance in synthetic tests on GFX11. */
|
||||
(sctx->gfx_level == GFX11 && blend->blend_enable_4bit))
|
||||
color_control |= S_028808_DISABLE_DUAL_QUAD(1);
|
||||
|
|
@ -2477,6 +2478,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
return;
|
||||
}
|
||||
|
||||
ASSERTED bool is_msaa_resolve = state->nr_cbufs == 2 &&
|
||||
state->cbufs[0].texture && state->cbufs[0].texture->nr_samples > 1 &&
|
||||
state->cbufs[1].texture && state->cbufs[1].texture->nr_samples <= 1;
|
||||
assert(!is_msaa_resolve); /* CB_RESOLVE is never used (also not supported by GFX11+). */
|
||||
|
||||
si_fb_barrier_after_rendering(sctx, SI_FB_BARRIER_SYNC_ALL);
|
||||
|
||||
/* Take the maximum of the old and new count. If the new count is lower,
|
||||
|
|
@ -2564,16 +2570,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
else
|
||||
sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
|
||||
|
||||
/* Don't update nr_color_samples for non-AA buffers.
|
||||
* (e.g. destination of MSAA resolve)
|
||||
*/
|
||||
if (tex->buffer.b.b.nr_samples >= 2 &&
|
||||
tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {
|
||||
sctx->framebuffer.nr_color_samples =
|
||||
MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples);
|
||||
sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples);
|
||||
}
|
||||
|
||||
if (tex->surface.is_linear)
|
||||
sctx->framebuffer.any_dst_linear = true;
|
||||
|
||||
|
|
@ -2692,12 +2688,6 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
|
|||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
|
||||
unsigned i, nr_cbufs = state->nr_cbufs;
|
||||
bool is_msaa_resolve = state->nr_cbufs == 2 &&
|
||||
state->cbufs[0].texture && state->cbufs[0].texture->nr_samples > 1 &&
|
||||
state->cbufs[1].texture && state->cbufs[1].texture->nr_samples <= 1;
|
||||
|
||||
/* CB can't do MSAA resolve on gfx11. */
|
||||
assert(!is_msaa_resolve || sctx->gfx_level < GFX11);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -2752,8 +2742,7 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
|
|||
/* CMASK and fast clears are configured elsewhere. */
|
||||
.cmask_enabled = false,
|
||||
.fast_clear_enabled = false,
|
||||
.dcc_enabled = vi_dcc_enabled(tex, cb_psurf->level) &&
|
||||
(i != 1 || !is_msaa_resolve),
|
||||
.dcc_enabled = vi_dcc_enabled(tex, cb_psurf->level),
|
||||
};
|
||||
struct ac_cb_surface cb_surf;
|
||||
|
||||
|
|
@ -2762,20 +2751,6 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
|
|||
cb_surf.cb_color_info |= tex->cb_color_info;
|
||||
|
||||
if (sctx->gfx_level < GFX11) {
|
||||
if (tex->swap_rgb_to_bgr) {
|
||||
/* Swap R and B channels. */
|
||||
static unsigned rgb_to_bgr[4] = {
|
||||
[V_028C70_SWAP_STD] = V_028C70_SWAP_ALT,
|
||||
[V_028C70_SWAP_ALT] = V_028C70_SWAP_STD,
|
||||
[V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV,
|
||||
[V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV,
|
||||
};
|
||||
unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_surf.cb_color_info)];
|
||||
|
||||
cb_surf.cb_color_info &= C_028C70_COMP_SWAP;
|
||||
cb_surf.cb_color_info |= S_028C70_COMP_SWAP(swap);
|
||||
}
|
||||
|
||||
if (cb_psurf->level > 0)
|
||||
cb_surf.cb_color_info &= C_028C70_FAST_CLEAR;
|
||||
else
|
||||
|
|
@ -2990,12 +2965,6 @@ static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned
|
|||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
|
||||
unsigned i, nr_cbufs = state->nr_cbufs;
|
||||
bool is_msaa_resolve = state->nr_cbufs == 2 &&
|
||||
state->cbufs[0].texture && state->cbufs[0].texture->nr_samples > 1 &&
|
||||
state->cbufs[1].texture && state->cbufs[1].texture->nr_samples <= 1;
|
||||
|
||||
/* CB can't do MSAA resolve on gfx11. */
|
||||
assert(!is_msaa_resolve);
|
||||
|
||||
radeon_begin(cs);
|
||||
gfx11_begin_packed_context_regs();
|
||||
|
|
@ -3137,12 +3106,6 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index
|
|||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
|
||||
unsigned i, nr_cbufs = state->nr_cbufs;
|
||||
bool is_msaa_resolve = state->nr_cbufs == 2 &&
|
||||
state->cbufs[0].texture && state->cbufs[0].texture->nr_samples > 1 &&
|
||||
state->cbufs[1].texture && state->cbufs[1].texture->nr_samples <= 1;
|
||||
|
||||
/* CB can't do MSAA resolve. */
|
||||
assert(!is_msaa_resolve);
|
||||
|
||||
radeon_begin(cs);
|
||||
gfx12_begin_context_regs();
|
||||
|
|
@ -4799,7 +4762,6 @@ void si_init_state_functions(struct si_context *sctx)
|
|||
sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
|
||||
|
||||
if (sctx->gfx_level < GFX11) {
|
||||
sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
|
||||
sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
|
||||
sctx->custom_blend_eliminate_fastclear =
|
||||
si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
|
||||
|
|
|
|||
|
|
@ -400,8 +400,7 @@ void si_test_blit_perf(struct si_screen *sscreen)
|
|||
const char *special_op =
|
||||
test_flavor == TEST_FB_CLEAR ? "cleartex" :
|
||||
test_flavor == TEST_CLEAR && box_flavor == BOX_FULL ? "fastclear" :
|
||||
test_flavor == TEST_BLIT && !yflip ? "copy" :
|
||||
test_flavor == TEST_RESOLVE ? "cbresolve" : "n/a";
|
||||
test_flavor == TEST_BLIT && !yflip ? "copy" : "n/a";
|
||||
|
||||
printf("%-8s, %-9s, %uD, %-18s, %u, %-5s, %-11s, %-11s",
|
||||
test_strings[test_flavor], special_op, dim,
|
||||
|
|
@ -631,8 +630,6 @@ void si_test_blit_perf(struct si_screen *sscreen)
|
|||
si_resource_copy_region(ctx, dst[size_factor], 0, dst_box.x,
|
||||
dst_box.y, dst_box.z, src[size_factor],
|
||||
0, &src_box);
|
||||
} else if (test_flavor == TEST_RESOLVE) {
|
||||
success &= si_msaa_resolve_blit_via_CB(ctx, &info, false);
|
||||
} else {
|
||||
success = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -192,7 +192,6 @@ static bool compare_gpu_textures(struct pipe_context *ctx, struct pipe_resource
|
|||
}
|
||||
|
||||
struct si_format_options {
|
||||
bool only_resolve;
|
||||
bool allow_float;
|
||||
bool allow_unorm16;
|
||||
bool allow_srgb;
|
||||
|
|
@ -274,10 +273,6 @@ static enum pipe_format get_random_format(struct si_screen *sscreen, bool render
|
|||
continue;
|
||||
}
|
||||
|
||||
if (options->only_resolve &&
|
||||
(desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || util_format_is_pure_integer(format)))
|
||||
continue;
|
||||
|
||||
if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
/* Every integer format should have an equivalent non-integer format, but 128-bit integer
|
||||
* formats don't have that if floats are disallowed, which can cause an infinite loop later
|
||||
|
|
@ -308,18 +303,9 @@ static enum pipe_format get_random_format(struct si_screen *sscreen, bool render
|
|||
|
||||
#define MAX_ALLOC_SIZE (64 * 1024 * 1024)
|
||||
|
||||
static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa,
|
||||
bool only_cb_resolve)
|
||||
static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa)
|
||||
{
|
||||
unsigned target_index;
|
||||
|
||||
if (only_cb_resolve) {
|
||||
target_index = 6; /* CB resolving doesn't support array textures. */
|
||||
} else {
|
||||
target_index = rand() % (allow_msaa ? 8 : 6);
|
||||
}
|
||||
|
||||
switch (target_index) {
|
||||
switch (rand() % (allow_msaa ? 8 : 6)) {
|
||||
case 0:
|
||||
templ->target = PIPE_TEXTURE_1D;
|
||||
break;
|
||||
|
|
@ -505,7 +491,6 @@ void si_test_image_copy_region(struct si_screen *sscreen)
|
|||
|
||||
/* generate a random test case */
|
||||
struct si_format_options format_options = {
|
||||
.only_resolve = false,
|
||||
.allow_float = true,
|
||||
.allow_unorm16 = true,
|
||||
.allow_x_channels = false, /* cpu_texture doesn't implement X channels */
|
||||
|
|
@ -518,8 +503,8 @@ void si_test_image_copy_region(struct si_screen *sscreen)
|
|||
/* MSAA copy testing not implemented and might be too difficult because of how
|
||||
* cpu_texture works.
|
||||
*/
|
||||
set_random_image_attrs(&tsrc, false, false);
|
||||
set_random_image_attrs(&tdst, false, false);
|
||||
set_random_image_attrs(&tsrc, false);
|
||||
set_random_image_attrs(&tdst, false);
|
||||
|
||||
/* Allocate textures (both the GPU and CPU copies).
|
||||
* The CPU will emulate what the GPU should be doing.
|
||||
|
|
@ -642,7 +627,6 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
unsigned iterations;
|
||||
unsigned num_pass = 0, num_fail = 0;
|
||||
bool only_cb_resolve = test_flags == DBG(TEST_CB_RESOLVE);
|
||||
|
||||
bool allow_float = false;
|
||||
bool allow_unorm16_dst = false;
|
||||
|
|
@ -657,15 +641,6 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
|
||||
/* The following tests always compare the tested operation with the gfx blit (u_blitter). */
|
||||
switch (test_flags) {
|
||||
case DBG(TEST_CB_RESOLVE):
|
||||
/* This is mostly failing because the precision of CB_RESOLVE is very different
|
||||
* from the gfx blit. FP32 and FP16 are the only formats that mostly pass.
|
||||
*/
|
||||
allow_float = true;
|
||||
allow_unorm16_dst = true;
|
||||
allow_srgb_dst = true;
|
||||
break;
|
||||
|
||||
case DBG(TEST_COMPUTE_BLIT):
|
||||
//allow_float = true; /* precision difference: NaNs not preserved by CB (u_blitter) */
|
||||
allow_unorm16_dst = true;
|
||||
|
|
@ -701,7 +676,6 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
/* Generate a random test case. */
|
||||
{
|
||||
struct si_format_options format_options = {
|
||||
.only_resolve = only_cb_resolve,
|
||||
.allow_float = allow_float,
|
||||
.allow_unorm16 = true,
|
||||
.allow_srgb = true,
|
||||
|
|
@ -714,8 +688,8 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
tdst.format = get_random_format(sscreen, true, tsrc.format, 0, 0, &format_options);
|
||||
}
|
||||
|
||||
set_random_image_attrs(&tsrc, true, only_cb_resolve);
|
||||
set_random_image_attrs(&tdst, !only_cb_resolve, false);
|
||||
set_random_image_attrs(&tsrc, true);
|
||||
set_random_image_attrs(&tdst, true);
|
||||
|
||||
/* MSAA blits must have matching sample counts. */
|
||||
if (tsrc.nr_samples > 1 && tdst.nr_samples > 1)
|
||||
|
|
@ -753,7 +727,6 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
|
||||
{
|
||||
struct si_format_options format_options = {
|
||||
.only_resolve = only_cb_resolve,
|
||||
.allow_float = allow_float,
|
||||
.allow_unorm16 = true,
|
||||
.allow_srgb = true,
|
||||
|
|
@ -945,14 +918,10 @@ void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
|
|||
info.src.resource = comp_src;
|
||||
info.dst.resource = comp_dst;
|
||||
|
||||
bool success;
|
||||
if (only_cb_resolve)
|
||||
success = si_msaa_resolve_blit_via_CB(ctx, &info, false);
|
||||
else
|
||||
success = si_compute_blit(sctx, &info, NULL, 0, 0, false);
|
||||
bool success = si_compute_blit(sctx, &info, NULL, 0, 0, false);
|
||||
|
||||
if (success) {
|
||||
printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
|
||||
printf(" %-7s", "comp");
|
||||
} else {
|
||||
si_gfx_blit(ctx, &info);
|
||||
printf(" %-7s", "gfx");
|
||||
|
|
|
|||
|
|
@ -591,7 +591,6 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
|
|||
tex->surface.meta_offset = new_tex->surface.meta_offset;
|
||||
tex->cb_color_info = new_tex->cb_color_info;
|
||||
memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
|
||||
tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
|
||||
|
||||
memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
|
||||
tex->dirty_level_mask = new_tex->dirty_level_mask;
|
||||
|
|
@ -1208,8 +1207,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
|||
}
|
||||
|
||||
/* Applies to GCN. */
|
||||
tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
|
||||
|
||||
if (tex->is_depth) {
|
||||
tex->htile_stencil_disabled = !tex->surface.has_stencil;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue