diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index bd9fe3aec7c..de032ca9554 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -853,7 +853,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->si_TA_CS_BC_BASE_ADDR_allowed = true; info->has_bo_metadata = true; info->has_gpu_reset_status_query = true; - info->has_eqaa_surface_allocator = true; + info->has_eqaa_surface_allocator = info->chip_class < GFX11; info->has_format_bc1_through_bc7 = true; /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */ info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 || info->drm_minor >= 2; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 210aa392c2a..20083c1ce85 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -447,6 +447,9 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture if (!level_mask) goto expand_fmask; + /* No color decompression is needed on GFX11. */ + assert(sctx->chip_class < GFX11 || need_dcc_decompress); + if (unlikely(sctx->log)) u_log_printf(sctx->log, "\n------------------------------------------------\n" @@ -534,6 +537,7 @@ static void si_blit_decompress_color(struct si_context *sctx, struct si_texture expand_fmask: if (need_fmask_expand && tex->surface.fmask_offset && !tex->fmask_is_identity) { + assert(sctx->chip_class < GFX11); /* no FMASK on gfx11 */ si_compute_expand_fmask(&sctx->b, &tex->buffer.b.b); tex->fmask_is_identity = true; } diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index b011efc4a63..93e15450714 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -106,6 +106,8 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info, static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex) { + assert(sscreen->info.chip_class < GFX11); + /* CMASK for MSAA is allocated in advance or always disabled * by "nofmask" option. */ @@ -180,10 +182,10 @@ bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format) return comp_swap != V_028C70_SWAP_STD_REV && comp_swap != V_028C70_SWAP_ALT_REV; } -static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format, - enum pipe_format surface_format, - const union pipe_color_union *color, uint32_t *clear_value, - bool *eliminate_needed) +static bool gfx8_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format, + enum pipe_format surface_format, + const union pipe_color_union *color, uint32_t *clear_value, + bool *eliminate_needed) { /* If we want to clear without needing a fast clear eliminate step, we * can set color and alpha independently to 0 or 1 (or 0/max for integer @@ -204,7 +206,7 @@ static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_fo return false; *eliminate_needed = true; - *clear_value = DCC_CLEAR_COLOR_REG; + *clear_value = GFX8_DCC_CLEAR_REG; if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return true; /* need ELIMINATE_FAST_CLEAR */ @@ -278,18 +280,143 @@ static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_fo if (color_value) { if (alpha_value) - *clear_value = DCC_CLEAR_COLOR_1111; + *clear_value = GFX8_DCC_CLEAR_1111; else - *clear_value = DCC_CLEAR_COLOR_1110; + *clear_value = GFX8_DCC_CLEAR_1110; } else { if (alpha_value) - *clear_value = DCC_CLEAR_COLOR_0001; + *clear_value = GFX8_DCC_CLEAR_0001; else - *clear_value = DCC_CLEAR_COLOR_0000; + *clear_value = GFX8_DCC_CLEAR_0000; } return true; } +static bool gfx11_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format surface_format, + const union pipe_color_union *color, uint32_t *clear_value) +{ + const struct util_format_description *desc = + util_format_description(si_simplify_cb_format(surface_format)); + unsigned start_bit = UINT_MAX; + unsigned end_bit = 0; + + /* TODO: 8bpp and 16bpp fast DCC clears don't work. */ + if (desc->block.bits <= 16) + return false; + + /* Find the used bit range. */ + for (unsigned i = 0; i < 4; i++) { + unsigned swizzle = desc->swizzle[i]; + + if (swizzle >= PIPE_SWIZZLE_0) + continue; + + start_bit = MIN2(start_bit, desc->channel[swizzle].shift); + end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size); + } + + union { + uint8_t ub[16]; + uint16_t us[8]; + uint32_t ui[4]; + } value = {}; + util_pack_color_union(surface_format, (union util_color*)&value, color); + + /* Check the cases where all components or bits are either all 0 or all 1. */ + bool all_bits_are_0 = true; + bool all_bits_are_1 = true; + bool all_words_are_fp16_1 = false; + bool all_words_are_fp32_1 = false; + + for (unsigned i = start_bit; i < end_bit; i++) { + bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8); + + all_bits_are_0 &= !bit; + all_bits_are_1 &= bit; + } + + if (start_bit % 16 == 0 && end_bit % 16 == 0) { + all_words_are_fp16_1 = true; + for (unsigned i = start_bit / 16; i < end_bit / 16; i++) + all_words_are_fp16_1 &= value.us[i] == 0x3c00; + } + + if (start_bit % 32 == 0 && end_bit % 32 == 0) { + all_words_are_fp32_1 = true; + for (unsigned i = start_bit / 32; i < end_bit / 32; i++) + all_words_are_fp32_1 &= value.ui[i] == 0x3f800000; + } + +#if 0 /* debug code */ + int i = util_format_get_first_non_void_channel(surface_format); + if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) { + printf("%i %i %i %i\n", color->i[0], color->i[1], color->i[2], color->i[3]); + } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) { + printf("%u %u %u %u\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]); + } else { + printf("%f %f %f %f\n", color->f[0], color->f[1], color->f[2], color->f[3]); + } + for (unsigned i = 0; i < end_bit / 8; i++) + printf("%02x", value.ub[i]); + printf("\n"); + printf("bits=[%u..%u)%s%s%s%s\n", start_bit, end_bit, + all_bits_are_0 ? ", all 0" : "", + all_bits_are_1 ? ", all 1" : "", + all_words_are_fp16_1 ? ", all fp16 1" : "", + all_words_are_fp32_1 ? ", all fp32 1" : ""); +#endif + + *clear_value = 0; + + if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) { + if (all_bits_are_0) + *clear_value = GFX11_DCC_CLEAR_0000; + else if (all_bits_are_1) + *clear_value = GFX11_DCC_CLEAR_1111_UNORM; + else if (all_words_are_fp16_1) + *clear_value = GFX11_DCC_CLEAR_1111_FP16; + else if (all_words_are_fp32_1) + *clear_value = GFX11_DCC_CLEAR_1111_FP32; + + return true; + } + + /* Check 0001 and 1110 cases. */ + if (vi_alpha_is_on_msb(sscreen, surface_format)) { + if (desc->nr_channels == 2 && desc->channel[0].size == 8) { + if (value.ub[0] == 0x00 && value.ub[1] == 0xff) { + *clear_value = GFX11_DCC_CLEAR_0001_UNORM; + return true; + } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) { + *clear_value = GFX11_DCC_CLEAR_1110_UNORM; + return true; + } + } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) { + if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && + value.ub[2] == 0x00 && value.ub[3] == 0xff) { + *clear_value = GFX11_DCC_CLEAR_0001_UNORM; + return true; + } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && + value.ub[2] == 0xff && value.ub[3] == 0x00) { + *clear_value = GFX11_DCC_CLEAR_1110_UNORM; + return true; + } + } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) { + if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && + value.us[2] == 0x0000 && value.us[3] == 0xffff) { + *clear_value = GFX11_DCC_CLEAR_0001_UNORM; + return true; + } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && + value.us[2] == 0xffff && value.us[3] == 0x0000) { + *clear_value = GFX11_DCC_CLEAR_1110_UNORM; + return true; + } + } + } + + return false; +} + bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level, unsigned clear_value, struct si_clear_info *out) { @@ -301,8 +428,9 @@ bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsi if (sctx->chip_class >= GFX10) { /* 4x and 8x MSAA needs a sophisticated compute shader for - * the clear. */ - if (tex->buffer.b.b.nr_storage_samples >= 4) + * the clear. GFX11 doesn't need that. + */ + if (sctx->chip_class < GFX11 && tex->buffer.b.b.nr_storage_samples >= 4) return false; unsigned num_layers = util_num_layers(&tex->buffer.b.b, level); @@ -607,10 +735,16 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR)) continue; - if (!vi_get_fast_clear_parameters(sctx->screen, tex->buffer.b.b.format, - fb->cbufs[i]->format, color, &reset_value, - &eliminate_needed)) - continue; + if (sctx->chip_class >= GFX11) { + if (!gfx11_get_dcc_clear_parameters(sctx->screen, fb->cbufs[i]->format, color, + &reset_value)) + continue; + } else { + if (!gfx8_get_dcc_clear_parameters(sctx->screen, tex->buffer.b.b.format, + fb->cbufs[i]->format, color, &reset_value, + &eliminate_needed)) + continue; + } /* Shared textures can't use fast clear without an explicit flush * because the clear color is not exported. @@ -649,6 +783,7 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, /* DCC fast clear with MSAA should clear CMASK to 0xC. */ if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) { + assert(sctx->chip_class < GFX11); /* no FMASK/CMASK on GFX11 */ assert(num_clears < ARRAY_SIZE(info)); si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b, tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC); @@ -656,6 +791,10 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, fmask_decompress_needed = true; } } else { + /* No CMASK on GFX11. */ + if (sctx->chip_class >= GFX11) + continue; + if (level > 0) continue; @@ -740,6 +879,7 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, if ((eliminate_needed || fmask_decompress_needed) && !(tex->dirty_level_mask & (1 << level))) { + assert(sctx->chip_class < GFX11); /* no decompression needed on GFX11 */ tex->dirty_level_mask |= 1 << level; si_set_sampler_depth_decompress_mask(sctx, tex); p_atomic_inc(&sctx->screen->compressed_colortex_counter); @@ -753,6 +893,9 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed) continue; + /* There are no clear color registers on GFX11. */ + assert(sctx->chip_class < GFX11); + if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) { sctx->framebuffer.dirty_cbufs |= 1 << i; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 83dd9616d40..2b80eeb4814 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -757,6 +757,8 @@ void gfx9_clear_dcc_msaa(struct si_context *sctx, struct pipe_resource *res, uin { struct si_texture *tex = (struct si_texture*)res; + assert(sctx->chip_class < GFX11); + /* Set the DCC buffer. */ assert(tex->surface.meta_offset && tex->surface.meta_offset <= UINT_MAX); assert(tex->buffer.bo_size <= UINT_MAX); @@ -809,6 +811,8 @@ void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex unsigned log_samples = util_logbase2(tex->nr_samples); assert(tex->nr_samples >= 2); + assert(sctx->chip_class < GFX11); + /* EQAA FMASK expansion is unimplemented. */ if (tex->nr_samples != tex->nr_storage_samples) return; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 5bef7a4d014..b95c05df7eb 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -154,7 +154,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA: case PIPE_CAP_TGSI_DIV: case PIPE_CAP_PACKED_UNIFORMS: - case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: case PIPE_CAP_GL_SPIRV: case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL: case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: @@ -175,6 +174,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DRAW_VERTEX_STATE: return !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST)); + case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: + return sscreen->info.chip_class < GFX11; + case PIPE_CAP_GLSL_ZERO_INIT: return 2; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a80e1f6bb4e..9ee492b3acd 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -162,14 +162,30 @@ enum si_has_ngg { NGG_ON, }; +#define DCC_CODE(x) (((x) << 24) | ((x) << 16) | ((x) << 8) | (x)) + enum si_clear_code { - DCC_CLEAR_COLOR_0000 = 0x00000000, - DCC_CLEAR_COLOR_0001 = 0x40404040, - DCC_CLEAR_COLOR_1110 = 0x80808080, - DCC_CLEAR_COLOR_1111 = 0xC0C0C0C0, - DCC_CLEAR_COLOR_REG = 0x20202020, - DCC_UNCOMPRESSED = 0xFFFFFFFF, + /* Common clear codes. */ + DCC_CLEAR_0000 = DCC_CODE(0x00), /* all bits are 0 */ + DCC_UNCOMPRESSED = DCC_CODE(0xFF), + + GFX8_DCC_CLEAR_0000 = DCC_CLEAR_0000, + GFX8_DCC_CLEAR_0001 = DCC_CODE(0x40), + GFX8_DCC_CLEAR_1110 = DCC_CODE(0x80), + GFX8_DCC_CLEAR_1111 = DCC_CODE(0xC0), + GFX8_DCC_CLEAR_REG = DCC_CODE(0x20), + GFX9_DCC_CLEAR_SINGLE = DCC_CODE(0x10), + + GFX11_DCC_CLEAR_SINGLE = DCC_CODE(0x01), + GFX11_DCC_CLEAR_0000 = DCC_CLEAR_0000, /* all bits are 0 */ + GFX11_DCC_CLEAR_1111_UNORM = DCC_CODE(0x02), /* all bits are 1 */ + GFX11_DCC_CLEAR_1111_FP16 = DCC_CODE(0x04), /* all 16-bit words are 0x3c00, max 64bpp */ + GFX11_DCC_CLEAR_1111_FP32 = DCC_CODE(0x06), /* all 32-bit words are 0x3f800000 */ + /* Color bits are 0, alpha bits are 1; only 88, 8888, 16161616 with alpha_on_msb=1 */ + GFX11_DCC_CLEAR_0001_UNORM = DCC_CODE(0x08), + /* Color bits are 1, alpha bits are 0, only 88, 8888, 16161616 with alpha_on_msb=1 */ + GFX11_DCC_CLEAR_1110_UNORM = DCC_CODE(0x0A), }; #define SI_IMAGE_ACCESS_DCC_OFF (1 << 8) @@ -378,7 +394,7 @@ struct si_texture { uint64_t cmask_base_address_reg; struct si_resource *cmask_buffer; unsigned cb_color_info; /* fast clear enable bit */ - unsigned color_clear_value[2]; + unsigned color_clear_value[2]; /* not on gfx11 */ unsigned last_msaa_resolve_target_micro_mode; bool swap_rgb_to_bgr_on_next_clear; bool swap_rgb_to_bgr; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 150ea708f92..9c45531d3d5 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -112,12 +112,18 @@ static void si_emit_cb_render_state(struct si_context *sctx) blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2; unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark; - radeon_opt_set_context_reg( - sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) | - S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | - S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | - S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode)); + if (sctx->chip_class >= GFX11) { + radeon_opt_set_context_reg(sctx, R_028424_CB_FDCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, + S_028424_SAMPLE_MASK_TRACKER_DISABLE(oc_disable) | + S_028424_SAMPLE_MASK_TRACKER_WATERMARK(watermark)); + } else { + radeon_opt_set_context_reg( + sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->chip_class <= GFX9) | + S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | + S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | + S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->info.has_dcc_constant_encode)); + } } /* RB+ register settings. */ @@ -486,6 +492,8 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend->cb_target_mask = 0; blend->cb_target_enabled_4bit = 0; + unsigned last_blend_cntl; + for (int i = 0; i < num_shader_outputs; i++) { /* state->rt entries > 0 only written if independent blending */ const int j = state->independent_blend_enable ? i : 0; @@ -505,9 +513,12 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, /* Only set dual source blending for MRT0 to avoid a hang. */ if (i >= 1 && blend->dual_src_blend) { - /* Vulkan does this for dual source blending. */ - if (i == 1) - blend_cntl |= S_028780_ENABLE(1); + if (i == 1) { + if (sctx->chip_class >= GFX11) + blend_cntl = last_blend_cntl; + else + blend_cntl = S_028780_ENABLE(1); + } si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; @@ -586,6 +597,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(sctx->chip_class, dstA)); } si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + last_blend_cntl = blend_cntl; blend->blend_enable_4bit |= 0xfu << (i * 4); @@ -2525,18 +2537,19 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); - if (sctx->chip_class >= GFX11) + if (sctx->chip_class >= GFX11) { color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_fragments); - else + } else { color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_fragments); - if (tex->surface.fmask_offset) { - color_info |= S_028C70_COMPRESSION(1); - unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); + if (tex->surface.fmask_offset) { + color_info |= S_028C70_COMPRESSION(1); + unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); - if (sctx->chip_class == GFX6) { - /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ - color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); + if (sctx->chip_class == GFX6) { + /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ + color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); + } } } } @@ -2582,6 +2595,8 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa /* GFX10 field has the same base shift as the GFX6 field */ unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer); + unsigned mip0_width = surf->width0 - 1; + unsigned mip0_height = surf->height0 - 1; unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); if (sctx->chip_class >= GFX10) { @@ -2597,8 +2612,8 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa } if (sctx->chip_class >= GFX9) { - surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | - S_028C68_MIP0_HEIGHT(surf->height0 - 1) | + surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(mip0_width) | + S_028C68_MIP0_HEIGHT(mip0_height) | S_028C68_MAX_MIP(tex->buffer.b.b.last_level); } @@ -3141,7 +3156,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb = (struct si_surface *)state->cbufs[i]; if (!cb) { radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, - S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID)); + sctx->chip_class >= GFX11 ? + S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) : + S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID)); continue; } @@ -3178,7 +3195,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_info |= S_028C70_COMP_SWAP(swap); } - if (cb->base.u.tex.level > 0) + if (sctx->chip_class < GFX11 && cb->base.u.tex.level > 0) cb_color_info &= C_028C70_FAST_CLEAR; if (tex->surface.fmask_offset) { @@ -3192,6 +3209,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx) state->cbufs[1] == &cb->base && state->cbufs[1]->texture->nr_samples <= 1; + /* CB can't do MSAA resolve on gfx11. */ + assert(!is_msaa_resolve_dst || sctx->chip_class < GFX11); + if (!is_msaa_resolve_dst && sctx->chip_class < GFX11) cb_color_info |= S_028C70_DCC_ENABLE(1); @@ -3202,7 +3222,33 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_dcc_base |= dcc_tile_swizzle; } - if (sctx->chip_class >= GFX10) { + if (sctx->chip_class >= GFX11) { + unsigned cb_color_attrib3, cb_fdcc_control; + + /* Set mutable surface parameters. */ + cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; + cb_color_base |= tex->surface.tile_swizzle; + + cb_color_attrib3 = cb->cb_color_attrib3 | + S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | + S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); + cb_fdcc_control = cb->cb_dcc_control | + S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | + S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level)); + + radeon_set_context_reg_seq(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, 4); + radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ + radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ + radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ + radeon_emit(cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ + + radeon_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base); + radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); + radeon_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base); + radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); + radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); + radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); + } else if (sctx->chip_class >= GFX10) { unsigned cb_color_attrib3; /* Set mutable surface parameters. */ @@ -5310,14 +5356,18 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); - sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); - sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); - sctx->custom_blend_eliminate_fastclear = - si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); + + if (sctx->chip_class < GFX11) { + sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); + sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); + sctx->custom_blend_eliminate_fastclear = + si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); + } + sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, sctx->chip_class >= GFX11 ? - V_028808_CB_DCC_DECOMPRESS_GFX11 : - V_028808_CB_DCC_DECOMPRESS_GFX8); + V_028808_CB_DCC_DECOMPRESS_GFX11 : + V_028808_CB_DCC_DECOMPRESS_GFX8); sctx->b.set_clip_state = si_set_clip_state; sctx->b.set_stencil_ref = si_set_stencil_ref; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 211a3be63f2..7b110eeae80 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -253,6 +253,9 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac flags |= RADEON_SURF_DISABLE_DCC; break; + case GFX11: + break; + default: assert(0); } @@ -279,6 +282,11 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac } if (ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING) { + /* GFX11 shouldn't get here because the flag is only used by the CB MSAA resolving + * that GFX11 doesn't have. + */ + assert(sscreen->info.chip_class <= GFX10_3); + flags |= RADEON_SURF_FORCE_SWIZZLE_MODE; if (sscreen->info.chip_class >= GFX10) @@ -997,6 +1005,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER; } else { if (tex->surface.cmask_offset) { + assert(sscreen->info.chip_class < GFX11); tex->cb_color_info |= S_028C70_FAST_CLEAR(1); tex->cmask_buffer = &tex->buffer; } @@ -1068,7 +1077,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Simple case - all tiles have DCC enabled. */ assert(num_clears < ARRAY_SIZE(clears)); si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, - tex->surface.meta_size, DCC_CLEAR_COLOR_0000); + tex->surface.meta_size, DCC_CLEAR_0000); } else if (sscreen->info.chip_class >= GFX9) { /* Clear to uncompressed. Clearing this to black is complicated. */ assert(num_clears < ARRAY_SIZE(clears)); @@ -1097,7 +1106,7 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, if (size) { assert(num_clears < ARRAY_SIZE(clears)); si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size, - DCC_CLEAR_COLOR_0000); + DCC_CLEAR_0000); } /* Mipmap levels without DCC. */ if (size != tex->surface.meta_size) { @@ -1115,7 +1124,9 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, * Clear to white to indicate that. */ assert(num_clears < ARRAY_SIZE(clears)); si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.display_dcc_offset, - tex->surface.u.gfx9.color.display_dcc_size, DCC_CLEAR_COLOR_1111); + tex->surface.u.gfx9.color.display_dcc_size, + sscreen->info.chip_class >= GFX11 ? GFX11_DCC_CLEAR_1111_UNORM + : GFX8_DCC_CLEAR_1111); } /* Execute the clears. */