ac,radv,radeonsi: use PM4 for shadowed registers
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34228>
This commit is contained in:
Samuel Pitoiset 2025-03-24 18:14:20 +01:00 committed by Marge Bot
parent 1ba427c4fb
commit a7d8e5d4ca
6 changed files with 242 additions and 222 deletions

View file

@ -11,6 +11,7 @@
#include "ac_shadowed_regs.h"
#include "ac_debug.h"
#include "ac_pm4.h"
#include "sid.h"
#include "util/macros.h"
#include "util/u_debug.h"
@ -778,11 +779,21 @@ void ac_get_reg_ranges(enum amd_gfx_level gfx_level, enum radeon_family family,
}
}
static void
set_context_reg_seq_array(struct ac_pm4_state *pm4, unsigned reg,
unsigned num, const uint32_t *values)
{
ac_pm4_cmd_add(pm4, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
ac_pm4_cmd_add(pm4, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
for (uint32_t i = 0; i < num; i++)
ac_pm4_cmd_add(pm4, values[i]);
}
/**
* Emulate CLEAR_STATE.
*/
static void gfx9_emulate_clear_state(struct radeon_cmdbuf *cs,
set_context_reg_seq_array_fn set_context_reg_seq_array)
static void gfx9_emulate_clear_state(struct ac_pm4_state *pm4)
{
static const uint32_t DbRenderControlGfx9[] = {
0x0, // DB_RENDER_CONTROL
@ -1407,31 +1418,31 @@ static void gfx9_emulate_clear_state(struct radeon_cmdbuf *cs,
#define SET(array) ARRAY_SIZE(array), array
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx9));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx9));
set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
set_context_reg_seq_array(pm4, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx9));
set_context_reg_seq_array(pm4, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx9));
set_context_reg_seq_array(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
SET(VgtMultiPrimIbResetIndxGfx9));
set_context_reg_seq_array(cs, R_028414_CB_BLEND_RED, SET(CbBlendRedGfx9));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx9));
set_context_reg_seq_array(cs, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertGfx9));
set_context_reg_seq_array(cs, R_028800_DB_DEPTH_CONTROL, SET(DbDepthControlGfx9));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx9));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx9));
set_context_reg_seq_array(cs, R_028A40_VGT_GS_MODE, SET(VgtGsModeGfx9));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx9));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx9));
set_context_reg_seq_array(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
set_context_reg_seq_array(pm4, R_028414_CB_BLEND_RED, SET(CbBlendRedGfx9));
set_context_reg_seq_array(pm4, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx9));
set_context_reg_seq_array(pm4, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertGfx9));
set_context_reg_seq_array(pm4, R_028800_DB_DEPTH_CONTROL, SET(DbDepthControlGfx9));
set_context_reg_seq_array(pm4, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx9));
set_context_reg_seq_array(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx9));
set_context_reg_seq_array(pm4, R_028A40_VGT_GS_MODE, SET(VgtGsModeGfx9));
set_context_reg_seq_array(pm4, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx9));
set_context_reg_seq_array(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx9));
set_context_reg_seq_array(pm4, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
SET(VgtGsMaxPrimsPerSubgroupGfx9));
set_context_reg_seq_array(cs, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1,
set_context_reg_seq_array(pm4, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1,
SET(VgtStrmoutBufferSize1Gfx9));
set_context_reg_seq_array(cs, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2,
set_context_reg_seq_array(pm4, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2,
SET(VgtStrmoutBufferSize2Gfx9));
set_context_reg_seq_array(cs, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3,
set_context_reg_seq_array(pm4, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3,
SET(VgtStrmoutBufferSize3Gfx9));
set_context_reg_seq_array(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET,
set_context_reg_seq_array(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET,
SET(VgtStrmoutDrawOpaqueOffsetGfx9));
set_context_reg_seq_array(cs, R_028B38_VGT_GS_MAX_VERT_OUT, SET(VgtGsMaxVertOutGfx9));
set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
set_context_reg_seq_array(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, SET(VgtGsMaxVertOutGfx9));
set_context_reg_seq_array(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
SET(PaScCentroidPriority0Gfx9));
}
@ -1439,9 +1450,8 @@ static void gfx9_emulate_clear_state(struct radeon_cmdbuf *cs,
* Emulate CLEAR_STATE. Additionally, initialize num_reg_pairs registers specified
* via reg_offsets and reg_values.
*/
static void gfx10_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values,
set_context_reg_seq_array_fn set_context_reg_seq_array)
static void gfx10_emulate_clear_state(struct ac_pm4_state *pm4, unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values)
{
static const uint32_t DbRenderControlNv10[] = {
0x0, // DB_RENDER_CONTROL
@ -2115,35 +2125,34 @@ static void gfx10_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_reg
0x0 // CB_COLOR7_ATTRIB3
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlNv10));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Nv10));
set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
set_context_reg_seq_array(pm4, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlNv10));
set_context_reg_seq_array(pm4, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Nv10));
set_context_reg_seq_array(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
SET(VgtMultiPrimIbResetIndxNv10));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Nv10));
set_context_reg_seq_array(cs, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertNv10));
set_context_reg_seq_array(cs, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadNv10));
set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
set_context_reg_seq_array(pm4, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Nv10));
set_context_reg_seq_array(pm4, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertNv10));
set_context_reg_seq_array(pm4, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadNv10));
set_context_reg_seq_array(pm4, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SET(GeMaxOutputPerSubgroupNv10));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeNv10));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelNv10));
set_context_reg_seq_array(cs, R_028A40_VGT_GS_MODE, SET(VgtGsModeNv10));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnNv10));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetNv10));
set_context_reg_seq_array(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlNv10));
set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
set_context_reg_seq_array(pm4, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeNv10));
set_context_reg_seq_array(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelNv10));
set_context_reg_seq_array(pm4, R_028A40_VGT_GS_MODE, SET(VgtGsModeNv10));
set_context_reg_seq_array(pm4, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnNv10));
set_context_reg_seq_array(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetNv10));
set_context_reg_seq_array(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlNv10));
set_context_reg_seq_array(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
SET(PaScCentroidPriority0Nv10));
for (unsigned i = 0; i < num_reg_pairs; i++)
set_context_reg_seq_array(cs, reg_offsets[i], 1, &reg_values[i]);
set_context_reg_seq_array(pm4, reg_offsets[i], 1, &reg_values[i]);
}
/**
* Emulate CLEAR_STATE. Additionally, initialize num_reg_pairs registers specified
* via reg_offsets and reg_values.
*/
static void gfx103_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values,
set_context_reg_seq_array_fn set_context_reg_seq_array)
static void gfx103_emulate_clear_state(struct ac_pm4_state *pm4, unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values)
{
static const uint32_t DbRenderControlGfx103[] = {
0x0, // DB_RENDER_CONTROL
@ -2819,36 +2828,35 @@ static void gfx103_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_re
0x0 // CB_COLOR7_ATTRIB3
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx103));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx103));
set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
set_context_reg_seq_array(pm4, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx103));
set_context_reg_seq_array(pm4, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx103));
set_context_reg_seq_array(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
SET(VgtMultiPrimIbResetIndxGfx103));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx103));
set_context_reg_seq_array(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL,
set_context_reg_seq_array(pm4, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx103));
set_context_reg_seq_array(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL,
SET(SxPsDownconvertControlGfx103));
set_context_reg_seq_array(cs, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadGfx103));
set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
set_context_reg_seq_array(pm4, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadGfx103));
set_context_reg_seq_array(pm4, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SET(GeMaxOutputPerSubgroupGfx103));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx103));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx103));
set_context_reg_seq_array(cs, R_028A40_VGT_GS_MODE, SET(VgtGsModeGfx103));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx103));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx103));
set_context_reg_seq_array(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlGfx103));
set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
set_context_reg_seq_array(pm4, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx103));
set_context_reg_seq_array(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx103));
set_context_reg_seq_array(pm4, R_028A40_VGT_GS_MODE, SET(VgtGsModeGfx103));
set_context_reg_seq_array(pm4, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx103));
set_context_reg_seq_array(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx103));
set_context_reg_seq_array(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlGfx103));
set_context_reg_seq_array(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
SET(PaScCentroidPriority0Gfx103));
for (unsigned i = 0; i < num_reg_pairs; i++)
set_context_reg_seq_array(cs, reg_offsets[i], 1, &reg_values[i]);
set_context_reg_seq_array(pm4, reg_offsets[i], 1, &reg_values[i]);
}
/**
* Emulate CLEAR_STATE. Additionally, initialize num_reg_pairs registers specified
* via reg_offsets and reg_values.
*/
static void gfx11_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values,
set_context_reg_seq_array_fn set_context_reg_seq_array)
static void gfx11_emulate_clear_state(struct ac_pm4_state *pm4,unsigned num_reg_pairs,
unsigned *reg_offsets, uint32_t *reg_values)
{
static const uint32_t DbRenderControlGfx11[] = {
0x0, // DB_RENDER_CONTROL
@ -3449,53 +3457,58 @@ static void gfx11_emulate_clear_state(struct radeon_cmdbuf *cs, unsigned num_reg
0x0, // CB_COLOR7_ATTRIB3
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx11));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx11));
set_context_reg_seq_array(cs, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, SET(PaScVrsOverrideCntlGfx11));
set_context_reg_seq_array(cs, R_0283F0_PA_SC_VRS_RATE_BASE, SET(PaScVrsRateBaseGfx11));
set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, SET(VgtMultiPrimIbResetIndxGfx11));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx11));
set_context_reg_seq_array(cs, R_028708_SPI_SHADER_IDX_FORMAT, SET(SpiShaderIdxFormatGfx11));
set_context_reg_seq_array(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, SET(SxPsDownconvertControlGfx11));
set_context_reg_seq_array(cs, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadGfx11));
set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, SET(GeMaxOutputPerSubgroupGfx11));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx11));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx11));
set_context_reg_seq_array(cs, R_028A48_PA_SC_MODE_CNTL_0, SET(PaScModeCntl0Gfx11));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx11));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx11));
set_context_reg_seq_array(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlGfx11));
set_context_reg_seq_array(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, SET(VgtEsgsRingItemsizeGfx11));
set_context_reg_seq_array(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, SET(VgtStrmoutDrawOpaqueOffsetGfx11));
set_context_reg_seq_array(cs, R_028B4C_GE_NGG_SUBGRP_CNTL, SET(GeNggSubgrpCntlGfx11));
set_context_reg_seq_array(cs, R_028B6C_VGT_TF_PARAM, SET(VgtTfParamGfx11));
set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, SET(PaScCentroidPriority0Gfx11));
set_context_reg_seq_array(cs, R_028C60_CB_COLOR0_BASE, SET(CbColor0BaseGfx11));
set_context_reg_seq_array(cs, R_028C6C_CB_COLOR0_VIEW, SET(CbColor0ViewGfx11));
set_context_reg_seq_array(cs, R_028C94_CB_COLOR0_DCC_BASE, SET(CbColor0DccBaseGfx11));
set_context_reg_seq_array(cs, R_028CA8_CB_COLOR1_VIEW, SET(CbColor1ViewGfx11));
set_context_reg_seq_array(cs, R_028CD0_CB_COLOR1_DCC_BASE, SET(CbColor1DccBaseGfx11));
set_context_reg_seq_array(cs, R_028CE4_CB_COLOR2_VIEW, SET(CbColor2ViewGfx11));
set_context_reg_seq_array(cs, R_028D0C_CB_COLOR2_DCC_BASE, SET(CbColor2DccBaseGfx11));
set_context_reg_seq_array(cs, R_028D20_CB_COLOR3_VIEW, SET(CbColor3ViewGfx11));
set_context_reg_seq_array(cs, R_028D48_CB_COLOR3_DCC_BASE, SET(CbColor3DccBaseGfx11));
set_context_reg_seq_array(cs, R_028D5C_CB_COLOR4_VIEW, SET(CbColor4ViewGfx11));
set_context_reg_seq_array(cs, R_028D84_CB_COLOR4_DCC_BASE, SET(CbColor4DccBaseGfx11));
set_context_reg_seq_array(cs, R_028D98_CB_COLOR5_VIEW, SET(CbColor5ViewGfx11));
set_context_reg_seq_array(cs, R_028DC0_CB_COLOR5_DCC_BASE, SET(CbColor5DccBaseGfx11));
set_context_reg_seq_array(cs, R_028DD4_CB_COLOR6_VIEW, SET(CbColor6ViewGfx11));
set_context_reg_seq_array(cs, R_028DFC_CB_COLOR6_DCC_BASE, SET(CbColor6DccBaseGfx11));
set_context_reg_seq_array(cs, R_028E10_CB_COLOR7_VIEW, SET(CbColor7ViewGfx11));
set_context_reg_seq_array(cs, R_028E38_CB_COLOR7_DCC_BASE, SET(CbColor7DccBaseGfx11));
set_context_reg_seq_array(cs, R_028C98_CB_COLOR0_DCC_BASE_EXT, SET(CbColor0DccBaseExtGfx11));
set_context_reg_seq_array(pm4, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx11));
set_context_reg_seq_array(pm4, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx11));
set_context_reg_seq_array(pm4, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, SET(PaScVrsOverrideCntlGfx11));
set_context_reg_seq_array(pm4, R_0283F0_PA_SC_VRS_RATE_BASE, SET(PaScVrsRateBaseGfx11));
set_context_reg_seq_array(pm4, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, SET(VgtMultiPrimIbResetIndxGfx11));
set_context_reg_seq_array(pm4, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx11));
set_context_reg_seq_array(pm4, R_028708_SPI_SHADER_IDX_FORMAT, SET(SpiShaderIdxFormatGfx11));
set_context_reg_seq_array(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, SET(SxPsDownconvertControlGfx11));
set_context_reg_seq_array(pm4, R_0287D4_PA_CL_POINT_X_RAD, SET(PaClPointXRadGfx11));
set_context_reg_seq_array(pm4, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, SET(GeMaxOutputPerSubgroupGfx11));
set_context_reg_seq_array(pm4, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx11));
set_context_reg_seq_array(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx11));
set_context_reg_seq_array(pm4, R_028A48_PA_SC_MODE_CNTL_0, SET(PaScModeCntl0Gfx11));
set_context_reg_seq_array(pm4, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx11));
set_context_reg_seq_array(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx11));
set_context_reg_seq_array(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlGfx11));
set_context_reg_seq_array(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, SET(VgtEsgsRingItemsizeGfx11));
set_context_reg_seq_array(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, SET(VgtStrmoutDrawOpaqueOffsetGfx11));
set_context_reg_seq_array(pm4, R_028B4C_GE_NGG_SUBGRP_CNTL, SET(GeNggSubgrpCntlGfx11));
set_context_reg_seq_array(pm4, R_028B6C_VGT_TF_PARAM, SET(VgtTfParamGfx11));
set_context_reg_seq_array(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, SET(PaScCentroidPriority0Gfx11));
set_context_reg_seq_array(pm4, R_028C60_CB_COLOR0_BASE, SET(CbColor0BaseGfx11));
set_context_reg_seq_array(pm4, R_028C6C_CB_COLOR0_VIEW, SET(CbColor0ViewGfx11));
set_context_reg_seq_array(pm4, R_028C94_CB_COLOR0_DCC_BASE, SET(CbColor0DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028CA8_CB_COLOR1_VIEW, SET(CbColor1ViewGfx11));
set_context_reg_seq_array(pm4, R_028CD0_CB_COLOR1_DCC_BASE, SET(CbColor1DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028CE4_CB_COLOR2_VIEW, SET(CbColor2ViewGfx11));
set_context_reg_seq_array(pm4, R_028D0C_CB_COLOR2_DCC_BASE, SET(CbColor2DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028D20_CB_COLOR3_VIEW, SET(CbColor3ViewGfx11));
set_context_reg_seq_array(pm4, R_028D48_CB_COLOR3_DCC_BASE, SET(CbColor3DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028D5C_CB_COLOR4_VIEW, SET(CbColor4ViewGfx11));
set_context_reg_seq_array(pm4, R_028D84_CB_COLOR4_DCC_BASE, SET(CbColor4DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028D98_CB_COLOR5_VIEW, SET(CbColor5ViewGfx11));
set_context_reg_seq_array(pm4, R_028DC0_CB_COLOR5_DCC_BASE, SET(CbColor5DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028DD4_CB_COLOR6_VIEW, SET(CbColor6ViewGfx11));
set_context_reg_seq_array(pm4, R_028DFC_CB_COLOR6_DCC_BASE, SET(CbColor6DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028E10_CB_COLOR7_VIEW, SET(CbColor7ViewGfx11));
set_context_reg_seq_array(pm4, R_028E38_CB_COLOR7_DCC_BASE, SET(CbColor7DccBaseGfx11));
set_context_reg_seq_array(pm4, R_028C98_CB_COLOR0_DCC_BASE_EXT, SET(CbColor0DccBaseExtGfx11));
for (unsigned i = 0; i < num_reg_pairs; i++)
set_context_reg_seq_array(cs, reg_offsets[i], 1, &reg_values[i]);
set_context_reg_seq_array(pm4, reg_offsets[i], 1, &reg_values[i]);
}
void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
set_context_reg_seq_array_fn set_context_reg_seq_array)
struct ac_pm4_state *ac_emulate_clear_state(const struct radeon_info *info)
{
struct ac_pm4_state *pm4;
pm4 = ac_pm4_create_sized(info, false, 768, false);
if (!pm4)
return NULL;
assert(info->gfx_level < GFX12);
/* Set context registers same as CLEAR_STATE to initialize shadow memory. */
@ -3503,16 +3516,19 @@ void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf
uint32_t reg_value = info->pa_sc_tile_steering_override;
if (info->gfx_level == GFX11 || info->gfx_level == GFX11_5) {
gfx11_emulate_clear_state(cs, 1, &reg_offset, &reg_value, set_context_reg_seq_array);
gfx11_emulate_clear_state(pm4, 1, &reg_offset, &reg_value);
} else if (info->gfx_level == GFX10_3) {
gfx103_emulate_clear_state(cs, 1, &reg_offset, &reg_value, set_context_reg_seq_array);
gfx103_emulate_clear_state(pm4, 1, &reg_offset, &reg_value);
} else if (info->gfx_level == GFX10) {
gfx10_emulate_clear_state(cs, 1, &reg_offset, &reg_value, set_context_reg_seq_array);
gfx10_emulate_clear_state(pm4, 1, &reg_offset, &reg_value);
} else if (info->gfx_level == GFX9) {
gfx9_emulate_clear_state(cs, set_context_reg_seq_array);
gfx9_emulate_clear_state(pm4);
} else {
unreachable("unimplemented");
}
ac_pm4_finalize(pm4);
return pm4;
}
static void ac_print_nonshadowed_reg(enum amd_gfx_level gfx_level, enum radeon_family family,
@ -3566,7 +3582,7 @@ void ac_print_nonshadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family
}
static void ac_build_load_reg(const struct radeon_info *info,
pm4_cmd_add_fn pm4_cmd_add, void *pm4_cmdbuf,
struct ac_pm4_state *pm4,
enum ac_reg_range_type type,
uint64_t gpu_address)
{
@ -3594,56 +3610,61 @@ static void ac_build_load_reg(const struct radeon_info *info,
break;
}
pm4_cmd_add(pm4_cmdbuf, PKT3(packet, 1 + num_ranges * 2, 0));
pm4_cmd_add(pm4_cmdbuf, gpu_address);
pm4_cmd_add(pm4_cmdbuf, gpu_address >> 32);
ac_pm4_cmd_add(pm4, PKT3(packet, 1 + num_ranges * 2, 0));
ac_pm4_cmd_add(pm4, gpu_address);
ac_pm4_cmd_add(pm4, gpu_address >> 32);
for (unsigned i = 0; i < num_ranges; i++) {
pm4_cmd_add(pm4_cmdbuf, (ranges[i].offset - offset) / 4);
pm4_cmd_add(pm4_cmdbuf, ranges[i].size / 4);
ac_pm4_cmd_add(pm4, (ranges[i].offset - offset) / 4);
ac_pm4_cmd_add(pm4, ranges[i].size / 4);
}
}
void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
pm4_cmd_add_fn pm4_cmd_add, void *pm4_cmdbuf,
struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *info,
uint64_t gpu_address,
bool dpbb_allowed)
{
struct ac_pm4_state *pm4;
pm4 = ac_pm4_create_sized(info, false, 256, false);
if (!pm4)
return NULL;
if (dpbb_allowed) {
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_EVENT_WRITE, 0, 0));
pm4_cmd_add(pm4_cmdbuf, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
/* Wait for idle, because we'll update VGT ring pointers. */
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_EVENT_WRITE, 0, 0));
pm4_cmd_add(pm4_cmdbuf, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_EVENT_WRITE, 0, 0));
pm4_cmd_add(pm4_cmdbuf, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
if (info->gfx_level >= GFX11) {
uint64_t rb_mask = BITFIELD64_MASK(info->max_render_backends);
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_EVENT_WRITE, 2, 0));
pm4_cmd_add(pm4_cmdbuf, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
pm4_cmd_add(pm4_cmdbuf, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
pm4_cmd_add(pm4_cmdbuf, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
/* We must wait for idle using an EOP event before changing the attribute ring registers.
* Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
*/
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_RELEASE_MEM, 6, 0));
pm4_cmd_add(pm4_cmdbuf, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
ac_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0));
ac_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
S_490_EVENT_INDEX(5) |
S_490_PWS_ENABLE(1));
pm4_cmd_add(pm4_cmdbuf, 0); /* DST_SEL, INT_SEL, DATA_SEL */
pm4_cmd_add(pm4_cmdbuf, 0); /* ADDRESS_LO */
pm4_cmd_add(pm4_cmdbuf, 0); /* ADDRESS_HI */
pm4_cmd_add(pm4_cmdbuf, 0); /* DATA_LO */
pm4_cmd_add(pm4_cmdbuf, 0); /* DATA_HI */
pm4_cmd_add(pm4_cmdbuf, 0); /* INT_CTXID */
ac_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */
ac_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */
ac_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */
ac_pm4_cmd_add(pm4, 0); /* DATA_LO */
ac_pm4_cmd_add(pm4, 0); /* DATA_HI */
ac_pm4_cmd_add(pm4, 0); /* INT_CTXID */
unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) |
S_586_GLM_INV(1) | S_586_GLM_WB(1) |
@ -3651,34 +3672,34 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL);
/* Wait for the PWS counter. */
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
pm4_cmd_add(pm4_cmdbuf, S_580_PWS_STAGE_SEL(V_580_CP_PFP) |
ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
ac_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_PFP) |
S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
S_580_PWS_ENA2(1) |
S_580_PWS_COUNT(0));
pm4_cmd_add(pm4_cmdbuf, 0xffffffff); /* GCR_SIZE */
pm4_cmd_add(pm4_cmdbuf, 0x01ffffff); /* GCR_SIZE_HI */
pm4_cmd_add(pm4_cmdbuf, 0); /* GCR_BASE_LO */
pm4_cmd_add(pm4_cmdbuf, 0); /* GCR_BASE_HI */
pm4_cmd_add(pm4_cmdbuf, S_585_PWS_ENA(1));
pm4_cmd_add(pm4_cmdbuf, gcr_cntl); /* GCR_CNTL */
ac_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */
ac_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */
ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */
ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */
ac_pm4_cmd_add(pm4, S_585_PWS_ENA(1));
ac_pm4_cmd_add(pm4, gcr_cntl); /* GCR_CNTL */
} else if (info->gfx_level >= GFX10) {
unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) |
S_586_GLM_INV(1) | S_586_GLM_WB(1) |
S_586_GL1_INV(1) | S_586_GLV_INV(1) |
S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL);
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
pm4_cmd_add(pm4_cmdbuf, 0); /* CP_COHER_CNTL */
pm4_cmd_add(pm4_cmdbuf, 0xffffffff); /* CP_COHER_SIZE */
pm4_cmd_add(pm4_cmdbuf, 0xffffff); /* CP_COHER_SIZE_HI */
pm4_cmd_add(pm4_cmdbuf, 0); /* CP_COHER_BASE */
pm4_cmd_add(pm4_cmdbuf, 0); /* CP_COHER_BASE_HI */
pm4_cmd_add(pm4_cmdbuf, 0x0000000A); /* POLL_INTERVAL */
pm4_cmd_add(pm4_cmdbuf, gcr_cntl); /* GCR_CNTL */
ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_CNTL */
ac_pm4_cmd_add(pm4, 0xffffffff); /* CP_COHER_SIZE */
ac_pm4_cmd_add(pm4, 0xffffff); /* CP_COHER_SIZE_HI */
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_BASE */
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_BASE_HI */
ac_pm4_cmd_add(pm4, 0x0000000A); /* POLL_INTERVAL */
ac_pm4_cmd_add(pm4, gcr_cntl); /* GCR_CNTL */
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
pm4_cmd_add(pm4_cmdbuf, 0);
ac_pm4_cmd_add(pm4, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
ac_pm4_cmd_add(pm4, 0);
} else if (info->gfx_level == GFX9) {
unsigned cp_coher_cntl = S_0301F0_SH_ICACHE_ACTION_ENA(1) |
S_0301F0_SH_KCACHE_ACTION_ENA(1) |
@ -3686,28 +3707,28 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
S_0301F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(1);
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
pm4_cmd_add(pm4_cmdbuf, cp_coher_cntl); /* CP_COHER_CNTL */
pm4_cmd_add(pm4_cmdbuf, 0xffffffff); /* CP_COHER_SIZE */
pm4_cmd_add(pm4_cmdbuf, 0xffffff); /* CP_COHER_SIZE_HI */
pm4_cmd_add(pm4_cmdbuf, 0); /* CP_COHER_BASE */
pm4_cmd_add(pm4_cmdbuf, 0); /* CP_COHER_BASE_HI */
pm4_cmd_add(pm4_cmdbuf, 0x0000000A); /* POLL_INTERVAL */
ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
ac_pm4_cmd_add(pm4, cp_coher_cntl); /* CP_COHER_CNTL */
ac_pm4_cmd_add(pm4, 0xffffffff); /* CP_COHER_SIZE */
ac_pm4_cmd_add(pm4, 0xffffff); /* CP_COHER_SIZE_HI */
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_BASE */
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_BASE_HI */
ac_pm4_cmd_add(pm4, 0x0000000A); /* POLL_INTERVAL */
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
pm4_cmd_add(pm4_cmdbuf, 0);
ac_pm4_cmd_add(pm4, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
ac_pm4_cmd_add(pm4, 0);
} else {
unreachable("invalid chip");
}
pm4_cmd_add(pm4_cmdbuf, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
pm4_cmd_add(pm4_cmdbuf,
ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(pm4,
CC0_UPDATE_LOAD_ENABLES(1) |
CC0_LOAD_PER_CONTEXT_STATE(1) |
CC0_LOAD_CS_SH_REGS(1) |
CC0_LOAD_GFX_SH_REGS(1) |
CC0_LOAD_GLOBAL_UCONFIG(1));
pm4_cmd_add(pm4_cmdbuf,
ac_pm4_cmd_add(pm4,
CC1_UPDATE_SHADOW_ENABLES(1) |
CC1_SHADOW_PER_CONTEXT_STATE(1) |
CC1_SHADOW_CS_SH_REGS(1) |
@ -3717,6 +3738,10 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
if (!info->has_fw_based_shadowing) {
for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++)
ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
ac_build_load_reg(info, pm4, i, gpu_address);
}
ac_pm4_finalize(pm4);
return pm4;
}

View file

@ -29,20 +29,13 @@ enum ac_reg_range_type
extern "C" {
#endif
typedef void (*pm4_cmd_add_fn)(void *pm4_cmdbuf, uint32_t value);
typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
const uint32_t *values);
void ac_get_reg_ranges(enum amd_gfx_level gfx_level, enum radeon_family family,
enum ac_reg_range_type type, unsigned *num_ranges,
const struct ac_reg_range **ranges);
void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
set_context_reg_seq_array_fn set_context_reg_seq_array);
struct ac_pm4_state *ac_emulate_clear_state(const struct radeon_info *info);
void ac_print_nonshadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family family);
void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
pm4_cmd_add_fn pm4_cmd_add, void *pm4_cmdbuf,
struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *info,
uint64_t gpu_address,
bool dpbb_allowed);
#ifdef __cplusplus

View file

@ -12,19 +12,13 @@
#include "radv_debug.h"
#include "sid.h"
static void
radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, const uint32_t *values)
{
radeon_set_context_reg_seq(cs, reg, num);
radeon_emit_array(cs, values, num);
}
VkResult
radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
const struct radeon_info *gpu_info = &pdev->info;
struct ac_pm4_state *pm4 = NULL;
VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
@ -41,9 +35,11 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
goto fail;
/* fill the cs for shadow regs preamble ib that starts the register shadowing */
ac_create_shadowing_ib_preamble(gpu_info, (pm4_cmd_add_fn)&radeon_emit, cs, queue_state->shadowed_regs->va,
device->pbb_allowed);
pm4 = ac_create_shadowing_ib_preamble(gpu_info, queue_state->shadowed_regs->va, device->pbb_allowed);
if (!pm4)
goto fail_create;
radeon_emit_array(cs, pm4->pm4, pm4->ndw);
ws->cs_pad(cs, 0);
result = radv_bo_create(
@ -65,12 +61,16 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
queue_state->shadow_regs_ib_size_dw = cs->cdw;
ws->buffer_unmap(ws, queue_state->shadow_regs_ib, false);
ac_pm4_free_state(pm4);
ws->cs_destroy(cs);
return VK_SUCCESS;
fail_map:
radv_bo_destroy(device, NULL, queue_state->shadow_regs_ib);
queue_state->shadow_regs_ib = NULL;
fail_ib_buffer:
ac_pm4_free_state(pm4);
fail_create:
radv_bo_destroy(device, NULL, queue_state->shadowed_regs);
queue_state->shadowed_regs = NULL;
fail:
@ -119,8 +119,16 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
if (pdev->info.gfx_level < GFX12)
ac_emulate_clear_state(gpu_info, cs, radv_set_context_reg_array);
if (pdev->info.gfx_level < GFX12) {
struct ac_pm4_state *pm4 = ac_emulate_clear_state(gpu_info);
if (!pm4) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
radeon_emit_array(cs, pm4->pm4, pm4->ndw);
ac_pm4_free_state(pm4);
}
result = ws->cs_finalize(cs);
if (result == VK_SUCCESS) {
@ -128,6 +136,7 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
result = VK_ERROR_UNKNOWN;
}
fail:
ws->cs_destroy(cs);
return result;
}

View file

@ -9,15 +9,6 @@
#include "ac_shadowed_regs.h"
#include "util/u_memory.h"
static void si_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
const uint32_t *values)
{
radeon_begin(cs);
radeon_set_context_reg_seq(reg, num);
radeon_emit_array(values, num);
radeon_end();
}
void si_init_cp_reg_shadowing(struct si_context *sctx)
{
if (sctx->has_graphics &&
@ -61,12 +52,11 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
0, sctx->shadowing.registers->bo_size, 0);
si_barrier_after_simple_buffer_op(sctx, 0, &sctx->shadowing.registers->b.b, NULL);
/* Create the shadowing preamble. (allocate enough dwords because the preamble is large) */
struct si_pm4_state *shadowing_preamble = si_pm4_create_sized(sctx->screen, 256, false);
/* Create the shadowing preamble. */
struct ac_pm4_state *shadowing_preamble =
ac_create_shadowing_ib_preamble(&sctx->screen->info,
(pm4_cmd_add_fn)ac_pm4_cmd_add, &shadowing_preamble->base,
sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed);
sctx->shadowing.registers->gpu_address,
sctx->screen->dpbb_allowed);
/* Initialize shadowed registers as follows. */
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers,
@ -76,13 +66,16 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
si_pm4_emit_commands(sctx, shadowing_preamble);
if (sctx->gfx_level < GFX12)
ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array);
if (sctx->gfx_level < GFX12) {
struct ac_pm4_state *clear_state = ac_emulate_clear_state(&sctx->screen->info);
si_pm4_emit_commands(sctx, clear_state);
ac_pm4_free_state(clear_state);
}
/* TODO: Gfx11 fails GLCTS if we don't re-emit the preamble at the beginning of every IB. */
/* TODO: Skipping this may have made register shadowing slower on Gfx11. */
if (sctx->gfx_level < GFX11) {
si_pm4_emit_commands(sctx, sctx->cs_preamble_state);
si_pm4_emit_commands(sctx, &sctx->cs_preamble_state->base);
/* The register values are shadowed, so we won't need to set them again. */
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
@ -95,8 +88,8 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
* which will load register values from memory on a context switch.
*/
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->base.pm4,
shadowing_preamble->base.ndw);
si_pm4_free_state(sctx, shadowing_preamble, ~0);
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4,
shadowing_preamble->ndw);
ac_pm4_free_state(shadowing_preamble);
}
}

View file

@ -37,12 +37,12 @@ void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsi
FREE(state);
}
void si_pm4_emit_commands(struct si_context *sctx, struct si_pm4_state *state)
void si_pm4_emit_commands(struct si_context *sctx, struct ac_pm4_state *state)
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_emit_array(state->base.pm4, state->base.ndw);
radeon_emit_array(state->pm4, state->ndw);
radeon_end();
}

View file

@ -39,7 +39,7 @@ void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
bool is_compute_queue);
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
void si_pm4_emit_commands(struct si_context *sctx, struct si_pm4_state *state);
void si_pm4_emit_commands(struct si_context *sctx, struct ac_pm4_state *state);
void si_pm4_emit_state(struct si_context *sctx, unsigned index);
void si_pm4_emit_shader(struct si_context *sctx, unsigned index);
void si_pm4_reset_emitted(struct si_context *sctx);