radeonsi/gfx11: use SET_CONTEXT_REG_PAIRS_PACKED for other states

It's used where registers are non-contiguous.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25941>
This commit is contained in:
Marek Olšák 2023-10-25 04:13:28 -04:00
parent 9579503bef
commit 2ac6816b70
5 changed files with 492 additions and 68 deletions

View file

@ -278,6 +278,28 @@
} \
} while (0)
#define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, category, buffer, reg_count) do { \
unsigned __v1 = (v1); \
unsigned __v2 = (v2); \
unsigned __v3 = (v3); \
unsigned __v4 = (v4); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0xf) != 0xf || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4) { \
gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE((reg_enum), 4); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
} \
} while (0)
/* GFX11 packet building helpers for buffered SH registers. */
#define gfx11_push_gfx_sh_reg(reg, value) \
gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
@ -295,6 +317,43 @@
gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, other, sctx->gfx11.buffered_compute_sh_regs, \
sctx->num_buffered_compute_sh_regs)
/* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
* Registers are buffered on the stack and then copied to the command buffer at the end.
*/
#define gfx11_begin_packed_context_regs() \
struct gfx11_reg_pair __cs_context_regs[50]; \
unsigned __cs_context_reg_count = 0;
#define gfx11_set_context_reg(reg, value) \
gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
#define gfx11_opt_set_context_reg(reg, reg_enum, value) \
gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, context, __cs_context_regs, \
__cs_context_reg_count)
#define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, context, __cs_context_regs, \
__cs_context_reg_count)
#define gfx11_end_packed_context_regs() do { \
if (__cs_context_reg_count >= 2) { \
/* Align the count to 2 by duplicating the first register. */ \
if (__cs_context_reg_count % 2 == 1) { \
gfx11_set_context_reg(__cs_context_regs[0].reg_offset[0] + SI_CONTEXT_REG_OFFSET, \
__cs_context_regs[0].reg_value[0]); \
} \
assert(__cs_context_reg_count % 2 == 0); \
unsigned __num_dw = (__cs_context_reg_count / 2) * 3; \
radeon_emit(PKT3(PKT3_SET_CONTEXT_REG_PAIRS_PACKED, __num_dw, 0) | PKT3_RESET_FILTER_CAM_S(1)); \
radeon_emit(__cs_context_reg_count); \
radeon_emit_array(__cs_context_regs, __num_dw); \
} else if (__cs_context_reg_count == 1) { \
radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
radeon_emit(__cs_context_regs[0].reg_offset[0]); \
radeon_emit(__cs_context_regs[0].reg_value[0]); \
} \
} while (0)
#define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
if (GFX_VERSION >= GFX11 && HAS_SH_PAIRS_PACKED) { \
gfx11_push_gfx_sh_reg(reg, value); \

View file

@ -249,18 +249,35 @@ static void si_emit_cb_render_state(struct si_context *sctx, unsigned index)
sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
}
radeon_begin(cs);
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
cb_target_mask);
if (sctx->gfx_level >= GFX8) {
radeon_opt_set_context_reg(sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
cb_dcc_control);
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
cb_target_mask);
gfx11_opt_set_context_reg(R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
cb_dcc_control);
gfx11_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert);
gfx11_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, SI_TRACKED_SX_BLEND_OPT_EPSILON,
sx_blend_opt_epsilon);
gfx11_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, SI_TRACKED_SX_BLEND_OPT_CONTROL,
sx_blend_opt_control);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
} else {
radeon_begin(cs);
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
cb_target_mask);
if (sctx->gfx_level >= GFX8) {
radeon_opt_set_context_reg(sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL,
cb_dcc_control);
}
if (sctx->screen->info.rbplus_allowed) {
radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
}
radeon_end_update_context_roll(sctx);
}
if (sctx->screen->info.rbplus_allowed) {
radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
}
radeon_end_update_context_roll(sctx);
}
/*
@ -883,12 +900,27 @@ static void si_emit_clip_regs(struct si_context *sctx, unsigned index)
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->gfx_level >= GFX10_3) |
clipdist_mask | (culldist_mask << 8);
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_cntl | vs->pa_cl_vs_out_cntl);
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
radeon_end_update_context_roll(sctx);
unsigned pa_cl_clip_cntl = rs->pa_cl_clip_cntl | ucp_mask |
S_028810_CLIP_DISABLE(window_space);
unsigned pa_cl_vs_out_cntl = pa_cl_cntl | vs->pa_cl_vs_out_cntl;
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
pa_cl_clip_cntl);
gfx11_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_vs_out_cntl);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
} else {
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
pa_cl_clip_cntl);
radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_vs_out_cntl);
radeon_end_update_context_roll(sctx);
}
}
/*
@ -1660,22 +1692,39 @@ static void si_emit_db_render_state(struct si_context *sctx, unsigned index)
S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
S_028010_CENTROID_COMPUTATION_MODE(sctx->gfx_level >= GFX10_3 ? 1 : 0);
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
db_render_control, db_count_control);
radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2,
SI_TRACKED_DB_RENDER_OVERRIDE2, db_render_override2);
radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
db_render_control);
gfx11_opt_set_context_reg(R_028004_DB_COUNT_CONTROL, SI_TRACKED_DB_COUNT_CONTROL,
db_count_control);
gfx11_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
db_render_override2);
gfx11_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
gfx11_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
} else {
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
db_render_control, db_count_control);
radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2,
SI_TRACKED_DB_RENDER_OVERRIDE2, db_render_override2);
radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
if (sctx->gfx_level >= GFX11) {
radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
} else if (sctx->gfx_level >= GFX10_3) {
radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
if (sctx->gfx_level >= GFX11) {
radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
} else if (sctx->gfx_level >= GFX10_3) {
radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
}
radeon_end_update_context_roll(sctx);
}
radeon_end_update_context_roll(sctx);
}
/*
@ -3583,6 +3632,189 @@ static void si_emit_framebuffer_state(struct si_context *sctx, unsigned index)
sctx->framebuffer.dirty_zsbuf = false;
}
static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned index)
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
unsigned i, nr_cbufs = state->nr_cbufs;
struct si_texture *tex = NULL;
struct si_surface *cb = NULL;
bool is_msaa_resolve = state->nr_cbufs == 2 &&
state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 &&
state->cbufs[1] && state->cbufs[1]->texture->nr_samples <= 1;
/* CB can't do MSAA resolve on gfx11. */
assert(!is_msaa_resolve);
radeon_begin(cs);
gfx11_begin_packed_context_regs();
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
continue;
/* RB+ depth-only rendering. See the comment where we set rbplus_depth_only_opt for more
* information.
*/
if (i == 0 &&
sctx->screen->info.rbplus_allowed &&
!sctx->queued.named.blend->cb_target_mask) {
gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
S_028C70_FORMAT_GFX11(V_028C70_COLOR_32) |
S_028C70_NUMBER_TYPE(V_028C70_NUMBER_FLOAT));
continue;
}
cb = (struct si_surface *)state->cbufs[i];
if (!cb) {
gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C,
S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID));
continue;
}
tex = (struct si_texture *)cb->base.texture;
radeon_add_to_buffer_list(
sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
(tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER));
if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) {
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer,
RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC |
RADEON_PRIO_SEPARATE_META);
}
/* Compute mutable surface parameters. */
uint64_t cb_color_base = tex->buffer.gpu_address >> 8;
uint64_t cb_dcc_base = 0;
unsigned cb_color_info = cb->cb_color_info | tex->cb_color_info;
/* Set up DCC. */
if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;
dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;
cb_dcc_base |= dcc_tile_swizzle;
}
unsigned cb_color_attrib3, cb_fdcc_control;
/* Set mutable surface parameters. */
cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
cb_color_base |= tex->surface.tile_swizzle;
cb_color_attrib3 = cb->cb_color_attrib3 |
S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) |
S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned);
cb_fdcc_control = cb->cb_dcc_control |
S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level));
if (sctx->family >= CHIP_GFX1103_R2) {
cb_fdcc_control |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) |
S_028C78_MAX_COMP_FRAGS(cb->base.texture->nr_samples >= 4);
}
gfx11_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base);
gfx11_set_context_reg(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, cb->cb_color_view);
gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, cb_color_info);
gfx11_set_context_reg(R_028C74_CB_COLOR0_ATTRIB + i * 0x3C, cb->cb_color_attrib);
gfx11_set_context_reg(R_028C78_CB_COLOR0_DCC_CONTROL + i * 0x3C, cb_fdcc_control);
gfx11_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base);
gfx11_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32);
gfx11_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32);
gfx11_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
gfx11_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
}
for (; i < 8; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
gfx11_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
/* ZS buffer. */
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct si_surface *zb = (struct si_surface *)state->zsbuf;
struct si_texture *tex = (struct si_texture *)zb->base.texture;
unsigned db_z_info = zb->db_z_info;
unsigned db_stencil_info = zb->db_stencil_info;
unsigned db_htile_surface = zb->db_htile_surface;
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE |
(zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
: RADEON_PRIO_DEPTH_BUFFER));
bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS);
/* Set fields dependent on tc_compatile_htile. */
if (tc_compat_htile) {
unsigned max_zplanes = 4;
if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
max_zplanes = 2;
bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
db_z_info |= S_028040_ITERATE_FLUSH(1) |
S_028040_ITERATE_256(iterate256);
db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
S_028044_ITERATE_256(iterate256);
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
!tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4)
max_zplanes = 1;
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
}
unsigned level = zb->base.u.tex.level;
gfx11_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
gfx11_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
gfx11_set_context_reg(R_028040_DB_Z_INFO, db_z_info |
S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0));
gfx11_set_context_reg(R_028044_DB_STENCIL_INFO, db_stencil_info);
gfx11_set_context_reg(R_028048_DB_Z_READ_BASE, zb->db_depth_base);
gfx11_set_context_reg(R_02804C_DB_STENCIL_READ_BASE, zb->db_stencil_base);
gfx11_set_context_reg(R_028050_DB_Z_WRITE_BASE, zb->db_depth_base);
gfx11_set_context_reg(R_028054_DB_STENCIL_WRITE_BASE, zb->db_stencil_base);
gfx11_set_context_reg(R_028068_DB_Z_READ_BASE_HI, zb->db_depth_base >> 32);
gfx11_set_context_reg(R_02806C_DB_STENCIL_READ_BASE_HI, zb->db_stencil_base >> 32);
gfx11_set_context_reg(R_028070_DB_Z_WRITE_BASE_HI, zb->db_depth_base >> 32);
gfx11_set_context_reg(R_028074_DB_STENCIL_WRITE_BASE_HI, zb->db_stencil_base >> 32);
gfx11_set_context_reg(R_028078_DB_HTILE_DATA_BASE_HI, zb->db_htile_data_base >> 32);
gfx11_set_context_reg(R_028028_DB_STENCIL_CLEAR, tex->stencil_clear_value[level]);
gfx11_set_context_reg(R_02802C_DB_DEPTH_CLEAR, fui(tex->depth_clear_value[level]));
gfx11_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
gfx11_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
} else if (sctx->framebuffer.dirty_zsbuf) {
/* Gfx11+: DB_Z_INFO.NUM_SAMPLES should match the framebuffer samples if no Z/S is bound.
* It determines the sample count for VRS, primitive-ordered pixel shading, and occlusion
* queries.
*/
gfx11_set_context_reg(R_028040_DB_Z_INFO,
S_028040_FORMAT(V_028040_Z_INVALID) |
S_028040_NUM_SAMPLES(sctx->framebuffer.log_samples));
gfx11_set_context_reg(R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID));
}
/* Framebuffer dimensions. */
/* PA_SC_WINDOW_SCISSOR_TL is set to 0,0 in gfx*_init_gfx_preamble_state */
gfx11_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
gfx11_end_packed_context_regs();
if (sctx->screen->dpbb_allowed &&
sctx->screen->pbb_context_states_per_bin > 1) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
radeon_end();
si_update_display_dcc_dirty(sctx);
sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
}
static bool si_out_of_order_rasterization(struct si_context *sctx)
{
struct si_state_blend *blend = sctx->queued.named.blend;
@ -3753,17 +3985,27 @@ static void si_emit_msaa_config(struct si_context *sctx, unsigned index)
}
}
radeon_begin(cs);
/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl, sc_aa_config);
/* R_028804_DB_EQAA */
radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
/* R_028A4C_PA_SC_MODE_CNTL_1 */
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
radeon_end_update_context_roll(sctx);
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl);
gfx11_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, SI_TRACKED_PA_SC_AA_CONFIG,
sc_aa_config);
gfx11_opt_set_context_reg(R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
gfx11_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
} else {
radeon_begin(cs);
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl, sc_aa_config);
radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa);
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
radeon_end_update_context_roll(sctx);
}
}
void si_update_ps_iter_samples(struct si_context *sctx)
@ -5447,7 +5689,11 @@ void si_init_state_functions(struct si_context *sctx)
sctx->atoms.s.pm4_states[SI_STATE_IDX(vs)].emit = si_pm4_emit_shader;
sctx->atoms.s.pm4_states[SI_STATE_IDX(ps)].emit = si_pm4_emit_shader;
sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
if (sctx->screen->info.has_set_context_pairs_packed)
sctx->atoms.s.framebuffer.emit = gfx11_dgpu_emit_framebuffer_state;
else
sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;

View file

@ -145,15 +145,28 @@ static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_cou
static void si_emit_max_4_sample_locs(struct si_context *sctx, uint64_t centroid_priority,
uint32_t sample_locs)
{
radeon_begin(&sctx->gfx_cs);
radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
radeon_emit(centroid_priority);
radeon_emit(centroid_priority >> 32);
radeon_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
radeon_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
radeon_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
radeon_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
radeon_end();
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
gfx11_set_context_reg(R_028BD4_PA_SC_CENTROID_PRIORITY_0, centroid_priority);
gfx11_set_context_reg(R_028BD8_PA_SC_CENTROID_PRIORITY_1, centroid_priority >> 32);
gfx11_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
gfx11_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
gfx11_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
gfx11_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
gfx11_end_packed_context_regs();
radeon_end();
} else {
radeon_begin(&sctx->gfx_cs);
radeon_set_context_reg_seq(R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
radeon_emit(centroid_priority);
radeon_emit(centroid_priority >> 32);
radeon_set_context_reg(R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs);
radeon_set_context_reg(R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
radeon_set_context_reg(R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
radeon_set_context_reg(R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
radeon_end();
}
}
static void si_emit_max_16_sample_locs(struct si_context *sctx, uint64_t centroid_priority,

View file

@ -1208,10 +1208,61 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
/* These don't cause a context roll. */
radeon_begin_again(&sctx->gfx_cs);
if (sctx->screen->info.uses_kernel_cu_mask) {
radeon_opt_set_sh_reg_idx(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
3, shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg_idx(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
3, shader->ngg.spi_shader_pgm_rsrc4_gs);
} else {
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->ngg.spi_shader_pgm_rsrc4_gs);
}
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
shader->ngg.ge_pc_alloc);
radeon_end();
}
template <enum si_has_tess HAS_TESS>
static void gfx11_dgpu_emit_shader_ngg(struct si_context *sctx, unsigned index)
{
struct si_shader *shader = sctx->queued.named.gs;
SET_FIELD(sctx->current_gs_state, GS_STATE_ESGS_VERTEX_STRIDE,
shader->ngg.esgs_vertex_stride);
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
if (HAS_TESS) {
gfx11_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param);
}
gfx11_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
shader->ngg.ge_max_output_per_subgroup);
gfx11_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, SI_TRACKED_GE_NGG_SUBGRP_CNTL,
shader->ngg.ge_ngg_subgrp_cntl);
gfx11_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN,
shader->ngg.vgt_primitiveid_en);
gfx11_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ngg.vgt_gs_max_vert_out);
gfx11_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT,
shader->ngg.vgt_gs_instance_cnt);
gfx11_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, SI_TRACKED_SPI_VS_OUT_CONFIG,
shader->ngg.spi_vs_out_config);
gfx11_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, SI_TRACKED_SPI_SHADER_POS_FORMAT,
shader->ngg.spi_shader_pos_format);
gfx11_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, SI_TRACKED_PA_CL_VTE_CNTL,
shader->ngg.pa_cl_vte_cntl);
gfx11_end_packed_context_regs();
assert(!sctx->screen->info.uses_kernel_cu_mask);
if (sctx->screen->info.has_set_sh_pairs_packed) {
assert(!sctx->screen->info.uses_kernel_cu_mask);
gfx11_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->gs.spi_shader_pgm_rsrc3_gs);
@ -1235,6 +1286,9 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
shader->ngg.spi_shader_pgm_rsrc4_gs);
}
}
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
shader->ngg.ge_pc_alloc);
radeon_end();
}
@ -1309,10 +1363,17 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
if (!pm4)
return;
if (es_stage == MESA_SHADER_TESS_EVAL)
pm4->atom.emit = gfx10_emit_shader_ngg<TESS_ON>;
else
pm4->atom.emit = gfx10_emit_shader_ngg<TESS_OFF>;
if (sscreen->info.has_set_context_pairs_packed) {
if (es_stage == MESA_SHADER_TESS_EVAL)
pm4->atom.emit = gfx11_dgpu_emit_shader_ngg<TESS_ON>;
else
pm4->atom.emit = gfx11_dgpu_emit_shader_ngg<TESS_OFF>;
} else {
if (es_stage == MESA_SHADER_TESS_EVAL)
pm4->atom.emit = gfx10_emit_shader_ngg<TESS_ON>;
else
pm4->atom.emit = gfx10_emit_shader_ngg<TESS_OFF>;
}
va = shader->bo->gpu_address;
@ -1735,7 +1796,7 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
return value;
}
static void si_emit_shader_ps(struct si_context *sctx, unsigned index)
static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index)
{
struct si_shader *shader = sctx->queued.named.ps;
@ -1755,6 +1816,30 @@ static void si_emit_shader_ps(struct si_context *sctx, unsigned index)
radeon_end_update_context_roll(sctx);
}
static void gfx11_dgpu_emit_shader_ps(struct si_context *sctx, unsigned index)
{
struct si_shader *shader = sctx->queued.named.ps;
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA,
shader->ps.spi_ps_input_ena);
gfx11_opt_set_context_reg(R_0286D0_SPI_PS_INPUT_ADDR, SI_TRACKED_SPI_PS_INPUT_ADDR,
shader->ps.spi_ps_input_addr);
gfx11_opt_set_context_reg(R_0286E0_SPI_BARYC_CNTL, SI_TRACKED_SPI_BARYC_CNTL,
shader->ps.spi_baryc_cntl);
gfx11_opt_set_context_reg(R_0286D8_SPI_PS_IN_CONTROL, SI_TRACKED_SPI_PS_IN_CONTROL,
shader->ps.spi_ps_in_control);
gfx11_opt_set_context_reg(R_028710_SPI_SHADER_Z_FORMAT, SI_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format);
gfx11_opt_set_context_reg(R_028714_SPI_SHADER_COL_FORMAT, SI_TRACKED_SPI_SHADER_COL_FORMAT,
shader->ps.spi_shader_col_format);
gfx11_opt_set_context_reg(R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
}
static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
{
struct si_shader_info *info = &shader->selector->info;
@ -1923,10 +2008,15 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
S_0286D8_PARAM_GEN(param_gen) |
S_0286D8_PS_W32_EN(shader->wave_size == 32);
struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader, si_emit_shader_ps);
struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader, NULL);
if (!pm4)
return;
if (sscreen->info.has_set_context_pairs_packed)
pm4->atom.emit = gfx11_dgpu_emit_shader_ps;
else
pm4->atom.emit = gfx6_emit_shader_ps;
/* If multiple state sets are allowed to be in a bin, break the batch on a new PS. */
if (sscreen->dpbb_allowed &&
(sscreen->pbb_context_states_per_bin > 1 ||

View file

@ -373,15 +373,31 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
*/
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg5(sctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
radeon_opt_set_context_reg(sctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
radeon_end_update_context_roll(sctx);
if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl);
gfx11_opt_set_context_reg4(R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
gfx11_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */
} else {
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg5(sctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
radeon_opt_set_context_reg(sctx, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
radeon_end_update_context_roll(sctx);
}
}
static void si_emit_scissors(struct si_context *ctx, unsigned index)