mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 13:00:09 +01:00
amd: massively simplify how info->spi_cu_en is applied
Instead of having ac_set_reg_cu_en that sets the register, replace it with ac_apply_cu_en that only returns the modified register value, which allows a large simplification in both drivers because a lot of code becomes duplicated after it's switched to ac_apply_cu_en. RADV also didn't apply it to a few registers. Fixed. This removes 82 lines of code in total. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21641>
This commit is contained in:
parent
2b3f551ed8
commit
ccaaf8fe04
10 changed files with 145 additions and 227 deletions
|
|
@ -958,9 +958,8 @@ unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims
|
|||
return CLAMP(workgroup_size, 1, 256);
|
||||
}
|
||||
|
||||
void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
|
||||
unsigned value_shift, const struct radeon_info *info,
|
||||
void set_sh_reg(void*, unsigned, uint32_t))
|
||||
uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
|
||||
const struct radeon_info *info)
|
||||
{
|
||||
/* Register field position and mask. */
|
||||
uint32_t cu_en_mask = ~clear_mask;
|
||||
|
|
@ -970,10 +969,8 @@ void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t cl
|
|||
|
||||
/* AND the field by spi_cu_en. */
|
||||
uint32_t spi_cu_en = info->spi_cu_en >> value_shift;
|
||||
uint32_t new_value = (value & ~cu_en_mask) |
|
||||
(((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
|
||||
|
||||
set_sh_reg(cs, reg_offset, new_value);
|
||||
return (value & ~cu_en_mask) |
|
||||
(((cu_en & spi_cu_en) << cu_en_shift) & cu_en_mask);
|
||||
}
|
||||
|
||||
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
|
||||
|
|
|
|||
|
|
@ -166,9 +166,8 @@ unsigned ac_compute_esgs_workgroup_size(enum amd_gfx_level gfx_level, unsigned w
|
|||
unsigned ac_compute_ngg_workgroup_size(unsigned es_verts, unsigned gs_inst_prims,
|
||||
unsigned max_vtx_out, unsigned prim_amp_factor);
|
||||
|
||||
void ac_set_reg_cu_en(void *cs, unsigned reg_offset, uint32_t value, uint32_t clear_mask,
|
||||
unsigned value_shift, const struct radeon_info *info,
|
||||
void set_sh_reg(void*, unsigned, uint32_t));
|
||||
uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift,
|
||||
const struct radeon_info *info);
|
||||
|
||||
void ac_get_scratch_tmpring_size(const struct radeon_info *info,
|
||||
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
|
||||
|
|
|
|||
|
|
@ -117,17 +117,6 @@ radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_
|
|||
radeon_emit(cs, value);
|
||||
}
|
||||
|
||||
static inline void
|
||||
gfx10_set_sh_reg_idx3(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
||||
{
|
||||
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
|
||||
assert(cs->cdw + 3 <= cs->max_dw);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG_INDEX, 1, 0));
|
||||
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (3 << 28));
|
||||
radeon_emit(cs, value);
|
||||
}
|
||||
|
||||
static inline void
|
||||
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3853,15 +3853,10 @@ radv_pipeline_emit_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
|||
&late_alloc_wave64, &cu_mask);
|
||||
|
||||
if (pdevice->rad_info.gfx_level >= GFX7) {
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &pdevice->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else {
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
|
||||
}
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
|
||||
ac_apply_cu_en(S_00B118_CU_EN(cu_mask) |
|
||||
S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &pdevice->rad_info));
|
||||
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
}
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
|
|
@ -4032,28 +4027,21 @@ radv_pipeline_emit_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs
|
|||
ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling,
|
||||
shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
|
||||
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pdevice->rad_info));
|
||||
|
||||
if (pdevice->rad_info.gfx_level >= GFX11) {
|
||||
/* TODO: figure out how S_00B204_CU_EN_GFX11 interacts with ac_set_reg_cu_en */
|
||||
gfx10_set_sh_reg_idx3(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
gfx10_set_sh_reg_idx3(
|
||||
cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
|
||||
} else if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pdevice->rad_info, (void*)gfx10_set_sh_reg_idx3);
|
||||
ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX11, 16, &pdevice->rad_info));
|
||||
} else {
|
||||
radeon_set_sh_reg_idx(
|
||||
pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
radeon_set_sh_reg_idx(
|
||||
pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
|
||||
}
|
||||
|
||||
uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0;
|
||||
|
|
@ -4213,25 +4201,16 @@ radv_pipeline_emit_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
|||
radeon_emit(cs, gs->config.rsrc2);
|
||||
}
|
||||
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pdevice->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
|
||||
C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else if (pdevice->rad_info.gfx_level >= GFX7) {
|
||||
radeon_set_sh_reg_idx(
|
||||
pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &pdevice->rad_info));
|
||||
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_idx(
|
||||
pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
|
||||
}
|
||||
if (pdevice->rad_info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_idx(pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
|
||||
C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info));
|
||||
}
|
||||
|
||||
radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, pipeline->base.gs_copy_shader);
|
||||
|
|
|
|||
|
|
@ -353,34 +353,35 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
if (physical_device->rad_info.gfx_level >= GFX10 &&
|
||||
physical_device->rad_info.gfx_level < GFX11) {
|
||||
/* Logical CUs 16 - 31 */
|
||||
ac_set_reg_cu_en(cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
|
||||
C_00B104_CU_EN, 16, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3,
|
||||
ac_apply_cu_en(S_00B104_CU_EN(0xffff),
|
||||
C_00B104_CU_EN, 16, &physical_device->rad_info));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
ac_set_reg_cu_en(cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
|
||||
ac_apply_cu_en(S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &physical_device->rad_info));
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
|
||||
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &physical_device->rad_info));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
|
||||
C_00B41C_CU_EN, 0, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else if (physical_device->rad_info.gfx_level == GFX9) {
|
||||
if (physical_device->rad_info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
|
||||
ac_apply_cu_en(S_00B41C_CU_EN(0xffff) |
|
||||
S_00B41C_WAVE_LIMIT(0x3F),
|
||||
C_00B41C_CU_EN, 0, &physical_device->rad_info));
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
|
||||
ac_apply_cu_en(S_00B51C_CU_EN(0xffff) |
|
||||
S_00B51C_WAVE_LIMIT(0x3F),
|
||||
C_00B51C_CU_EN, 0, &physical_device->rad_info));
|
||||
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
|
||||
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
|
||||
ac_apply_cu_en(S_00B31C_CU_EN(0xffff) |
|
||||
S_00B31C_WAVE_LIMIT(0x3F),
|
||||
C_00B31C_CU_EN, 0, &physical_device->rad_info));
|
||||
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
||||
* Other chips are unaffected. These are suboptimal values,
|
||||
* but we don't use on-chip GS.
|
||||
|
|
@ -389,16 +390,11 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) |
|
||||
S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
|
||||
C_00B01C_CU_EN, 0, &physical_device->rad_info,
|
||||
(void*)gfx10_set_sh_reg_idx3);
|
||||
} else {
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
|
||||
S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
|
||||
}
|
||||
radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
|
||||
ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
|
||||
S_00B01C_WAVE_LIMIT(0x3F) |
|
||||
S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11),
|
||||
C_00B01C_CU_EN, 0, &physical_device->rad_info));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.gfx_level >= GFX10) {
|
||||
|
|
|
|||
|
|
@ -125,11 +125,11 @@
|
|||
radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
|
||||
} while (0)
|
||||
|
||||
#define radeon_set_sh_reg_idx3_seq(reg, num) do { \
|
||||
#define radeon_set_sh_reg_idx3_seq(sctx, reg, num) do { \
|
||||
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||
assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
|
||||
radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \
|
||||
radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | (3 << 28)); \
|
||||
radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | ((sctx)->gfx_level >= GFX10 ? 3 << 28 : 0)); \
|
||||
} while (0)
|
||||
|
||||
#define radeon_set_sh_reg(reg, value) do { \
|
||||
|
|
@ -137,8 +137,8 @@
|
|||
radeon_emit(value); \
|
||||
} while (0)
|
||||
|
||||
#define radeon_set_sh_reg_idx3(reg, value) do { \
|
||||
radeon_set_sh_reg_idx3_seq(reg, 1); \
|
||||
#define radeon_set_sh_reg_idx3(sctx, reg, value) do { \
|
||||
radeon_set_sh_reg_idx3_seq(sctx, reg, 1); \
|
||||
radeon_emit(value); \
|
||||
} while (0)
|
||||
|
||||
|
|
@ -297,10 +297,7 @@
|
|||
unsigned __value = val; \
|
||||
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||
sctx->tracked_regs.reg_value[reg] != __value) { \
|
||||
if (sctx->gfx_level >= GFX10) \
|
||||
radeon_set_sh_reg_idx3(offset, __value); \
|
||||
else \
|
||||
radeon_set_sh_reg(offset, __value); \
|
||||
radeon_set_sh_reg_idx3(sctx, offset, __value); \
|
||||
sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
|
||||
sctx->tracked_regs.reg_value[reg] = __value; \
|
||||
} \
|
||||
|
|
@ -338,23 +335,6 @@
|
|||
radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
|
||||
} while (0)
|
||||
|
||||
/* Wrappers that are only used when they are passed as function pointers. */
|
||||
static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
|
||||
uint32_t value)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg(reg_offset, value);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static inline void radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf *cs, unsigned reg_offset,
|
||||
uint32_t value)
|
||||
{
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg_idx3(reg_offset, value);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
/* This should be evaluated at compile time if all parameters are constants. */
|
||||
static ALWAYS_INLINE unsigned
|
||||
si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
|
||||
|
|
|
|||
|
|
@ -104,11 +104,13 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
|||
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
||||
}
|
||||
|
||||
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||
void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state,
|
||||
unsigned reg, uint32_t val)
|
||||
{
|
||||
SI_CHECK_SHADOWED_REGS(reg, 1);
|
||||
|
||||
si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
|
||||
si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX,
|
||||
sscreen->info.gfx_level >= GFX10 ? 3 : 0);
|
||||
}
|
||||
|
||||
void si_pm4_clear_state(struct si_pm4_state *state)
|
||||
|
|
|
|||
|
|
@ -31,7 +31,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
// forward defines
|
||||
/* forward definitions */
|
||||
struct si_screen;
|
||||
struct si_context;
|
||||
|
||||
/* State atoms are callbacks which write a sequence of packets into a GPU
|
||||
|
|
@ -64,7 +65,8 @@ struct si_pm4_state {
|
|||
|
||||
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
|
||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||
void si_pm4_set_reg_idx3(struct si_screen *sscreen, struct si_pm4_state *state,
|
||||
unsigned reg, uint32_t val);
|
||||
|
||||
void si_pm4_clear_state(struct si_pm4_state *state);
|
||||
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
|
||||
|
|
|
|||
|
|
@ -5671,12 +5671,11 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
cu_mask_ps = gfx103_get_cu_mask_ps(sscreen);
|
||||
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
S_00B01C_CU_EN(cu_mask_ps) |
|
||||
S_00B01C_WAVE_LIMIT(0x3F) |
|
||||
S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11),
|
||||
C_00B01C_CU_EN, 0, &sscreen->info,
|
||||
(void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
|
||||
ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
|
||||
S_00B01C_WAVE_LIMIT(0x3F) |
|
||||
S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11),
|
||||
C_00B01C_CU_EN, 0, &sscreen->info));
|
||||
}
|
||||
|
||||
if (sctx->gfx_level <= GFX8) {
|
||||
|
|
@ -5711,13 +5710,13 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
}
|
||||
|
||||
if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) {
|
||||
ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||
S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
|
||||
C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
|
||||
ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
|
||||
C_00B51C_CU_EN, 0, &sscreen->info));
|
||||
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
|
||||
ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||
S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
|
||||
C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg);
|
||||
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
|
||||
ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
|
||||
C_00B31C_CU_EN, 0, &sscreen->info));
|
||||
|
||||
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
||||
* Other chips are unaffected. These are suboptimal values,
|
||||
|
|
@ -5774,10 +5773,9 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
}
|
||||
|
||||
if (sctx->gfx_level >= GFX9) {
|
||||
ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN,
|
||||
0, &sscreen->info,
|
||||
(void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
|
||||
ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
|
||||
C_00B41C_CU_EN, 0, &sscreen->info));
|
||||
|
||||
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
|
||||
S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
|
||||
|
|
@ -5885,12 +5883,15 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
|||
|
||||
if (sctx->gfx_level >= GFX10 && sctx->gfx_level <= GFX10_3) {
|
||||
/* Logical CUs 16 - 31 */
|
||||
ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
|
||||
ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff),
|
||||
C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
|
||||
ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3);
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
|
||||
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &sscreen->info));
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
|
||||
ac_apply_cu_en(S_00B104_CU_EN(0xffff),
|
||||
C_00B104_CU_EN, 16, &sscreen->info));
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
|
||||
ac_apply_cu_en(S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info));
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
|
||||
si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
|
||||
|
|
|
|||
|
|
@ -713,11 +713,10 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
|
||||
if (sscreen->info.gfx_level >= GFX9) {
|
||||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
|
||||
S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
|
||||
S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))si_pm4_set_reg_idx3);
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
|
||||
ac_apply_cu_en(S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
|
||||
S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info));
|
||||
}
|
||||
if (sscreen->info.gfx_level >= GFX10) {
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
|
|
@ -983,37 +982,18 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
|||
radeon_end_update_context_roll(sctx);
|
||||
|
||||
/* These don't cause any context rolls. */
|
||||
if (sctx->screen->info.spi_cu_en_has_effect) {
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs,
|
||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))
|
||||
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||
}
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc4_gs,
|
||||
C_00B204_CU_EN_GFX10, 16, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))
|
||||
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS);
|
||||
}
|
||||
} else {
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
radeon_end();
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->gs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
|
@ -1125,10 +1105,14 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
|
||||
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
shader->gs.spi_shader_pgm_rsrc4_gs = S_00B204_CU_EN_GFX10(0xffff) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0);
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &sscreen->info);
|
||||
shader->gs.spi_shader_pgm_rsrc4_gs =
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
|
||||
C_00B204_CU_EN_GFX10, 16, &sscreen->info);
|
||||
|
||||
shader->gs.vgt_gs_onchip_cntl =
|
||||
S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) |
|
||||
|
|
@ -1143,8 +1127,10 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->key.ge.part.gs.es, shader);
|
||||
} else {
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &sscreen->info);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
pm4->reg_va_low_idx = pm4->ndw - 1;
|
||||
|
|
@ -1218,30 +1204,13 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
|||
radeon_begin_again(&sctx->gfx_cs);
|
||||
radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC,
|
||||
shader->ngg.ge_pc_alloc);
|
||||
if (sctx->screen->info.spi_cu_en_has_effect) {
|
||||
radeon_end();
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc3_gs,
|
||||
C_00B21C_CU_EN, 0, &sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))
|
||||
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs,
|
||||
sctx->gfx_level >= GFX11 ? C_00B204_CU_EN_GFX11 : C_00B204_CU_EN_GFX10, 16,
|
||||
&sctx->screen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))
|
||||
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
|
||||
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
|
||||
~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS);
|
||||
} else {
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_end();
|
||||
}
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_opt_set_sh_reg_idx3(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
|
||||
|
|
@ -1439,15 +1408,21 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL) |
|
||||
S_00B22C_LDS_SIZE(shader->config.lds_size));
|
||||
|
||||
shader->ngg.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(cu_mask) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
shader->ngg.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(cu_mask) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &sscreen->info);
|
||||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs =
|
||||
S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64) |
|
||||
S_00B204_INST_PREF_SIZE(si_get_shader_prefetch_size(shader));
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX11(0x1) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64) |
|
||||
S_00B204_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)),
|
||||
C_00B204_CU_EN_GFX11, 16, &sscreen->info);
|
||||
} else {
|
||||
shader->ngg.spi_shader_pgm_rsrc4_gs =
|
||||
S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64);
|
||||
ac_apply_cu_en(S_00B204_CU_EN_GFX10(0xffff) |
|
||||
S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
|
||||
C_00B204_CU_EN_GFX10, 16, &sscreen->info);
|
||||
}
|
||||
|
||||
nparams = MAX2(shader->info.nr_param_exports, 1);
|
||||
|
|
@ -1727,11 +1702,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
|||
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX7) {
|
||||
ac_set_reg_cu_en(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &sscreen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))
|
||||
(sscreen->info.gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg));
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
ac_apply_cu_en(S_00B118_CU_EN(cu_mask) |
|
||||
S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &sscreen->info));
|
||||
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
}
|
||||
|
||||
|
|
@ -2049,11 +2023,10 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
|||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
unsigned cu_mask_ps = gfx103_get_cu_mask_ps(sscreen);
|
||||
|
||||
ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
|
||||
S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
|
||||
S_00B004_CU_EN(cu_mask_ps >> 16),
|
||||
C_00B004_CU_EN, 16, &sscreen->info,
|
||||
(void (*)(void*, unsigned, uint32_t))si_pm4_set_reg_idx3);
|
||||
si_pm4_set_reg_idx3(sscreen, pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
|
||||
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16) |
|
||||
S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)),
|
||||
C_00B004_CU_EN, 16, &sscreen->info));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue