mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 09:00:10 +01:00
radeonsi: add new possibly faster command submission helpers
This decreases the release libgallium_dri.so size without debug symbols
by 16384 bytes. The CPU time spent in si_emit_draw_packets decreased
from 4.5% to 4.1% in viewperf13/catia/plane01.
The previous code did:
cs->current.buf[cs->current.cdw++] = ...;
cs->current.buf[cs->current.cdw++] = ...;
cs->current.buf[cs->current.cdw++] = ...;
cs->current.buf[cs->current.cdw++] = ...;
The new code does:
unsigned num = cs->current.cdw;
uint32_t *buf = cs->current.buf;
buf[num++] = ...;
buf[num++] = ...;
buf[num++] = ...;
buf[num++] = ...;
cs->current.cdw = num;
The code is the same (radeon_emit is redefined as a macro) except that
all set and emit functions must be surrounded by radeon_begin(cs) and
radeon_end().
radeon_packets_added() returns whether there has been any new packets added
since radeon_begin.
radeon_end_update_context_roll(sctx) sets sctx->context_roll = true
if there has been any new packets added since radeon_begin.
For now, the "cs" parameter is intentionally unused in radeon_emit and
radeon_emit_array.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8653>
This commit is contained in:
parent
3ef89b245e
commit
a0978fffb8
19 changed files with 579 additions and 381 deletions
|
|
@ -39,249 +39,251 @@
|
||||||
#define SI_CHECK_SHADOWED_REGS(reg_offset, count)
|
#define SI_CHECK_SHADOWED_REGS(reg_offset, count)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
|
#define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
|
||||||
{
|
unsigned __cs_num = __cs->current.cdw; \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, num);
|
UNUSED unsigned __cs_num_initial = __cs_num; \
|
||||||
assert(reg < SI_CONTEXT_REG_OFFSET);
|
uint32_t *__cs_buf = __cs->current.buf
|
||||||
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
|
|
||||||
radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
#define radeon_begin_again(cs) do { \
|
||||||
{
|
assert(__cs == NULL); \
|
||||||
radeon_set_config_reg_seq(cs, reg, 1);
|
__cs = (cs); \
|
||||||
radeon_emit(cs, value);
|
__cs_num = __cs->current.cdw; \
|
||||||
}
|
__cs_num_initial = __cs_num; \
|
||||||
|
__cs_buf = __cs->current.buf; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
|
#define radeon_end() do { \
|
||||||
{
|
__cs->current.cdw = __cs_num; \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, num);
|
assert(__cs->current.cdw <= __cs->current.max_dw); \
|
||||||
assert(reg >= SI_CONTEXT_REG_OFFSET);
|
__cs = NULL; \
|
||||||
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
} while (0)
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
|
|
||||||
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
#define radeon_emit(cs, value) __cs_buf[__cs_num++] = (value)
|
||||||
{
|
#define radeon_packets_added() (__cs_num != __cs_num_initial)
|
||||||
radeon_set_context_reg_seq(cs, reg, 1);
|
|
||||||
radeon_emit(cs, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_context_reg_seq_array(struct radeon_cmdbuf *cs, unsigned reg,
|
#define radeon_end_update_context_roll(sctx) do { \
|
||||||
unsigned num, const uint32_t *values)
|
radeon_end(); \
|
||||||
{
|
if (radeon_packets_added()) \
|
||||||
radeon_set_context_reg_seq(cs, reg, num);
|
(sctx)->context_roll = true; \
|
||||||
radeon_emit_array(cs, values, num);
|
} while (0)
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
|
#define radeon_emit_array(cs, values, num) do { \
|
||||||
unsigned value)
|
unsigned __n = (num); \
|
||||||
{
|
memcpy(__cs_buf + __cs_num, (values), __n * 4); \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, 1);
|
__cs_num += __n; \
|
||||||
assert(reg >= SI_CONTEXT_REG_OFFSET);
|
} while (0)
|
||||||
assert(cs->current.cdw + 3 <= cs->current.max_dw);
|
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
|
|
||||||
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
|
|
||||||
radeon_emit(cs, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
|
#define radeon_set_config_reg_seq(cs, reg, num) do { \
|
||||||
{
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, num);
|
assert((reg) < SI_CONTEXT_REG_OFFSET); \
|
||||||
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
|
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
|
||||||
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
radeon_emit(cs, ((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
|
} while (0)
|
||||||
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
#define radeon_set_config_reg(cs, reg, value) do { \
|
||||||
{
|
radeon_set_config_reg_seq(cs, reg, 1); \
|
||||||
radeon_set_sh_reg_seq(cs, reg, 1);
|
radeon_emit(cs, value); \
|
||||||
radeon_emit(cs, value);
|
} while (0)
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, bool perfctr)
|
#define radeon_set_context_reg_seq(cs, reg, num) do { \
|
||||||
{
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, num);
|
assert((reg) >= SI_CONTEXT_REG_OFFSET); \
|
||||||
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
|
||||||
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, perfctr));
|
} while (0)
|
||||||
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
#define radeon_set_context_reg(cs, reg, value) do { \
|
||||||
{
|
radeon_set_context_reg_seq(cs, reg, 1); \
|
||||||
radeon_set_uconfig_reg_seq(cs, reg, 1, false);
|
radeon_emit(cs, value); \
|
||||||
radeon_emit(cs, value);
|
} while (0)
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_uconfig_reg_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
|
#define radeon_set_context_reg_seq_array(cs, reg, num, values) do { \
|
||||||
{
|
radeon_set_context_reg_seq(cs, reg, num); \
|
||||||
radeon_set_uconfig_reg_seq(cs, reg, 1, true);
|
radeon_emit_array(cs, values, num); \
|
||||||
radeon_emit(cs, value);
|
} while (0)
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_uconfig_reg_idx(struct radeon_cmdbuf *cs, struct si_screen *screen,
|
#define radeon_set_context_reg_idx(cs, reg, idx, value) do { \
|
||||||
unsigned reg, unsigned idx, unsigned value)
|
SI_CHECK_SHADOWED_REGS(reg, 1); \
|
||||||
{
|
assert((reg) >= SI_CONTEXT_REG_OFFSET); \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, 1);
|
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
|
||||||
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
|
||||||
assert(cs->current.cdw + 3 <= cs->current.max_dw);
|
radeon_emit(cs, value); \
|
||||||
assert(idx != 0);
|
} while (0)
|
||||||
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
|
|
||||||
if (screen->info.chip_class < GFX9 ||
|
|
||||||
(screen->info.chip_class == GFX9 && screen->info.me_fw_version < 26))
|
|
||||||
opcode = PKT3_SET_UCONFIG_REG;
|
|
||||||
radeon_emit(cs, PKT3(opcode, 1, 0));
|
|
||||||
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
|
|
||||||
radeon_emit(cs, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg,
|
#define radeon_set_sh_reg_seq(cs, reg, num) do { \
|
||||||
unsigned value, unsigned mask)
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
{
|
assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
|
||||||
SI_CHECK_SHADOWED_REGS(reg, 1);
|
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); \
|
||||||
assert(reg >= SI_CONTEXT_REG_OFFSET);
|
radeon_emit(cs, ((reg) - SI_SH_REG_OFFSET) >> 2); \
|
||||||
assert(cs->current.cdw + 4 <= cs->current.max_dw);
|
} while (0)
|
||||||
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
|
|
||||||
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
|
#define radeon_set_sh_reg(cs, reg, value) do { \
|
||||||
radeon_emit(cs, mask);
|
radeon_set_sh_reg_seq(cs, reg, 1); \
|
||||||
radeon_emit(cs, value);
|
radeon_emit(cs, value); \
|
||||||
}
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_uconfig_reg_seq(cs, reg, num, perfctr) do { \
|
||||||
|
SI_CHECK_SHADOWED_REGS(reg, num); \
|
||||||
|
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
|
||||||
|
radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_uconfig_reg(cs, reg, value) do { \
|
||||||
|
radeon_set_uconfig_reg_seq(cs, reg, 1, false); \
|
||||||
|
radeon_emit(cs, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_uconfig_reg_perfctr(cs, reg, value) do { \
|
||||||
|
radeon_set_uconfig_reg_seq(cs, reg, 1, true); \
|
||||||
|
radeon_emit(cs, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_uconfig_reg_idx(cs, screen, chip_class, reg, idx, value) do { \
|
||||||
|
SI_CHECK_SHADOWED_REGS(reg, 1); \
|
||||||
|
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
|
||||||
|
assert((idx) != 0); \
|
||||||
|
unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
|
||||||
|
if ((chip_class) < GFX9 || \
|
||||||
|
((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \
|
||||||
|
__opcode = PKT3_SET_UCONFIG_REG; \
|
||||||
|
radeon_emit(cs, PKT3(__opcode, 1, 0)); \
|
||||||
|
radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
|
||||||
|
radeon_emit(cs, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define radeon_set_context_reg_rmw(cs, reg, value, mask) do { \
|
||||||
|
SI_CHECK_SHADOWED_REGS(reg, 1); \
|
||||||
|
assert((reg) >= SI_CONTEXT_REG_OFFSET); \
|
||||||
|
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); \
|
||||||
|
radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
|
||||||
|
radeon_emit(cs, mask); \
|
||||||
|
radeon_emit(cs, value); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */
|
/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */
|
||||||
static inline void radeon_opt_set_context_reg_rmw(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_reg_rmw(sctx, offset, reg, val, mask) do { \
|
||||||
enum si_tracked_reg reg, unsigned value,
|
unsigned __value = (val); \
|
||||||
unsigned mask)
|
assert((__value & ~mask) == 0); \
|
||||||
{
|
__value &= mask; \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||||
|
sctx->tracked_regs.reg_value[reg] != __value) { \
|
||||||
assert((value & ~mask) == 0);
|
radeon_set_context_reg_rmw(&sctx->gfx_cs, offset, __value, mask); \
|
||||||
value &= mask;
|
sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
|
||||||
|
sctx->tracked_regs.reg_value[reg] = __value; \
|
||||||
if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 ||
|
} \
|
||||||
sctx->tracked_regs.reg_value[reg] != value) {
|
} while (0)
|
||||||
radeon_set_context_reg_rmw(cs, offset, value, mask);
|
|
||||||
|
|
||||||
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
|
|
||||||
sctx->tracked_regs.reg_value[reg] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
|
/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
|
||||||
static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
|
||||||
enum si_tracked_reg reg, unsigned value)
|
unsigned __value = val; \
|
||||||
{
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
sctx->tracked_regs.reg_value[reg] != __value) { \
|
||||||
|
radeon_set_context_reg(&sctx->gfx_cs, offset, __value); \
|
||||||
if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 ||
|
sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
|
||||||
sctx->tracked_regs.reg_value[reg] != value) {
|
sctx->tracked_regs.reg_value[reg] = __value; \
|
||||||
radeon_set_context_reg(cs, offset, value);
|
} \
|
||||||
|
} while (0)
|
||||||
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
|
|
||||||
sctx->tracked_regs.reg_value[reg] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set 2 consecutive registers if any registers value is different.
|
* Set 2 consecutive registers if any registers value is different.
|
||||||
* @param offset starting register offset
|
* @param offset starting register offset
|
||||||
* @param value1 is written to first register
|
* @param val1 is written to first register
|
||||||
* @param value2 is written to second register
|
* @param val2 is written to second register
|
||||||
*/
|
*/
|
||||||
static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
|
||||||
enum si_tracked_reg reg, unsigned value1,
|
unsigned __value1 = (val1), __value2 = (val2); \
|
||||||
unsigned value2)
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
|
||||||
{
|
sctx->tracked_regs.reg_value[reg] != __value1 || \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
|
||||||
|
radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 2); \
|
||||||
if (((sctx->tracked_regs.reg_saved >> reg) & 0x3) != 0x3 ||
|
radeon_emit(cs, __value1); \
|
||||||
sctx->tracked_regs.reg_value[reg] != value1 ||
|
radeon_emit(cs, __value2); \
|
||||||
sctx->tracked_regs.reg_value[reg + 1] != value2) {
|
sctx->tracked_regs.reg_value[reg] = __value1; \
|
||||||
radeon_set_context_reg_seq(cs, offset, 2);
|
sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
|
||||||
radeon_emit(cs, value1);
|
sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
|
||||||
radeon_emit(cs, value2);
|
} \
|
||||||
|
} while (0)
|
||||||
sctx->tracked_regs.reg_value[reg] = value1;
|
|
||||||
sctx->tracked_regs.reg_value[reg + 1] = value2;
|
|
||||||
sctx->tracked_regs.reg_saved |= 0x3ull << reg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set 3 consecutive registers if any registers value is different.
|
* Set 3 consecutive registers if any registers value is different.
|
||||||
*/
|
*/
|
||||||
static inline void radeon_opt_set_context_reg3(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
|
||||||
enum si_tracked_reg reg, unsigned value1,
|
unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
|
||||||
unsigned value2, unsigned value3)
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
|
||||||
{
|
sctx->tracked_regs.reg_value[reg] != __value1 || \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
|
||||||
|
sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
|
||||||
if (((sctx->tracked_regs.reg_saved >> reg) & 0x7) != 0x7 ||
|
radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 3); \
|
||||||
sctx->tracked_regs.reg_value[reg] != value1 ||
|
radeon_emit(cs, __value1); \
|
||||||
sctx->tracked_regs.reg_value[reg + 1] != value2 ||
|
radeon_emit(cs, __value2); \
|
||||||
sctx->tracked_regs.reg_value[reg + 2] != value3) {
|
radeon_emit(cs, __value3); \
|
||||||
radeon_set_context_reg_seq(cs, offset, 3);
|
sctx->tracked_regs.reg_value[reg] = __value1; \
|
||||||
radeon_emit(cs, value1);
|
sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
|
||||||
radeon_emit(cs, value2);
|
sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
|
||||||
radeon_emit(cs, value3);
|
sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
|
||||||
|
} \
|
||||||
sctx->tracked_regs.reg_value[reg] = value1;
|
} while (0)
|
||||||
sctx->tracked_regs.reg_value[reg + 1] = value2;
|
|
||||||
sctx->tracked_regs.reg_value[reg + 2] = value3;
|
|
||||||
sctx->tracked_regs.reg_saved |= 0x7ull << reg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set 4 consecutive registers if any registers value is different.
|
* Set 4 consecutive registers if any registers value is different.
|
||||||
*/
|
*/
|
||||||
static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
|
||||||
enum si_tracked_reg reg, unsigned value1,
|
unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
|
||||||
unsigned value2, unsigned value3, unsigned value4)
|
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
|
||||||
{
|
sctx->tracked_regs.reg_value[reg] != __value1 || \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
|
||||||
|
sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
|
||||||
if (((sctx->tracked_regs.reg_saved >> reg) & 0xf) != 0xf ||
|
sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
|
||||||
sctx->tracked_regs.reg_value[reg] != value1 ||
|
radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 4); \
|
||||||
sctx->tracked_regs.reg_value[reg + 1] != value2 ||
|
radeon_emit(cs, __value1); \
|
||||||
sctx->tracked_regs.reg_value[reg + 2] != value3 ||
|
radeon_emit(cs, __value2); \
|
||||||
sctx->tracked_regs.reg_value[reg + 3] != value4) {
|
radeon_emit(cs, __value3); \
|
||||||
radeon_set_context_reg_seq(cs, offset, 4);
|
radeon_emit(cs, __value4); \
|
||||||
radeon_emit(cs, value1);
|
sctx->tracked_regs.reg_value[reg] = __value1; \
|
||||||
radeon_emit(cs, value2);
|
sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
|
||||||
radeon_emit(cs, value3);
|
sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
|
||||||
radeon_emit(cs, value4);
|
sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
|
||||||
|
sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
|
||||||
sctx->tracked_regs.reg_value[reg] = value1;
|
} \
|
||||||
sctx->tracked_regs.reg_value[reg + 1] = value2;
|
} while (0)
|
||||||
sctx->tracked_regs.reg_value[reg + 2] = value3;
|
|
||||||
sctx->tracked_regs.reg_value[reg + 3] = value4;
|
|
||||||
sctx->tracked_regs.reg_saved |= 0xfull << reg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set consecutive registers if any registers value is different.
|
* Set consecutive registers if any registers value is different.
|
||||||
*/
|
*/
|
||||||
static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset,
|
#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
|
||||||
unsigned *value, unsigned *saved_val, unsigned num)
|
for (unsigned i = 0; i < (num); i++) { \
|
||||||
{
|
if ((saved_val)[i] != (value)[i]) { \
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
radeon_set_context_reg_seq(&(sctx)->gfx_cs, offset, num); \
|
||||||
|
for (unsigned j = 0; j < (num); j++) \
|
||||||
|
radeon_emit(cs, value[j]); \
|
||||||
|
memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
for (unsigned i = 0; i < num; i++) {
|
#define radeon_set_privileged_config_reg(cs, reg, value) do { \
|
||||||
if (saved_val[i] != value[i]) {
|
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
|
||||||
radeon_set_context_reg_seq(cs, offset, num);
|
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); \
|
||||||
for (unsigned j = 0; j < num; j++)
|
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
|
||||||
radeon_emit(cs, value[j]);
|
COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
|
||||||
|
radeon_emit(cs, value); \
|
||||||
|
radeon_emit(cs, 0); /* unused */ \
|
||||||
|
radeon_emit(cs, (reg) >> 2); \
|
||||||
|
radeon_emit(cs, 0); /* unused */ \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
memcpy(saved_val, value, sizeof(uint32_t) * num);
|
#define radeon_emit_32bit_pointer(sscreen, cs, va) do { \
|
||||||
break;
|
radeon_emit(cs, va); \
|
||||||
}
|
assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
|
||||||
}
|
} while (0)
|
||||||
}
|
|
||||||
|
#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
|
||||||
|
unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
|
||||||
|
radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, 1); \
|
||||||
|
radeon_emit_32bit_pointer(sctx->screen, cs, (desc)->gpu_address); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/* This should be evaluated at compile time if all parameters are constants. */
|
/* This should be evaluated at compile time if all parameters are constants. */
|
||||||
static ALWAYS_INLINE unsigned
|
static ALWAYS_INLINE unsigned
|
||||||
|
|
|
||||||
|
|
@ -349,6 +349,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf
|
||||||
{
|
{
|
||||||
uint64_t bc_va = sctx->border_color_buffer->gpu_address;
|
uint64_t bc_va = sctx->border_color_buffer->gpu_address;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
|
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
|
||||||
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
|
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
|
||||||
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
|
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
|
||||||
|
|
@ -404,6 +405,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf
|
||||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
|
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
|
||||||
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
|
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader,
|
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader,
|
||||||
|
|
@ -505,6 +507,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ,
|
||||||
RADEON_PRIO_SHADER_BINARY);
|
RADEON_PRIO_SHADER_BINARY);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
|
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
|
||||||
radeon_emit(cs, shader_va >> 8);
|
radeon_emit(cs, shader_va >> 8);
|
||||||
radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
|
radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
|
||||||
|
|
@ -524,6 +527,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
||||||
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||||
S_00B860_WAVES(sctx->scratch_waves) |
|
S_00B860_WAVES(sctx->scratch_waves) |
|
||||||
S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
|
S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->cs_shader_state.emitted_program = program;
|
sctx->cs_shader_state.emitted_program = program;
|
||||||
sctx->cs_shader_state.offset = offset;
|
sctx->cs_shader_state.offset = offset;
|
||||||
|
|
@ -562,11 +566,13 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + (user_sgpr * 4), 4);
|
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + (user_sgpr * 4), 4);
|
||||||
radeon_emit(cs, scratch_dword0);
|
radeon_emit(cs, scratch_dword0);
|
||||||
radeon_emit(cs, scratch_dword1);
|
radeon_emit(cs, scratch_dword1);
|
||||||
radeon_emit(cs, scratch_dword2);
|
radeon_emit(cs, scratch_dword2);
|
||||||
radeon_emit(cs, scratch_dword3);
|
radeon_emit(cs, scratch_dword3);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object,
|
static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object,
|
||||||
|
|
@ -589,6 +595,8 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
|
||||||
user_sgpr += 4;
|
user_sgpr += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (AMD_HSA_BITS_GET(code_object->code_properties, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) {
|
if (AMD_HSA_BITS_GET(code_object->code_properties, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) {
|
||||||
struct dispatch_packet dispatch;
|
struct dispatch_packet dispatch;
|
||||||
unsigned dispatch_offset;
|
unsigned dispatch_offset;
|
||||||
|
|
@ -646,6 +654,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
|
||||||
user_sgpr += 1;
|
user_sgpr += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object,
|
static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object,
|
||||||
|
|
@ -693,13 +702,18 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
||||||
12 * sel->info.uses_grid_size;
|
12 * sel->info.uses_grid_size;
|
||||||
unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size;
|
unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sel->info.uses_grid_size) {
|
if (sel->info.uses_grid_size) {
|
||||||
if (info->indirect) {
|
if (info->indirect) {
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; ++i) {
|
for (unsigned i = 0; i < 3; ++i) {
|
||||||
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
|
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
|
||||||
COPY_DATA_SRC_MEM, si_resource(info->indirect),
|
COPY_DATA_SRC_MEM, si_resource(info->indirect),
|
||||||
info->indirect_offset + 4 * i);
|
info->indirect_offset + 4 * i);
|
||||||
}
|
}
|
||||||
|
radeon_begin_again(cs);
|
||||||
} else {
|
} else {
|
||||||
radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
|
radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
|
||||||
radeon_emit(cs, info->grid[0]);
|
radeon_emit(cs, info->grid[0]);
|
||||||
|
|
@ -719,6 +733,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
||||||
radeon_set_sh_reg_seq(cs, cs_user_data_reg, sel->info.base.cs.user_data_components_amd);
|
radeon_set_sh_reg_seq(cs, cs_user_data_reg, sel->info.base.cs.user_data_components_amd);
|
||||||
radeon_emit_array(cs, sctx->cs_user_data, sel->info.base.cs.user_data_components_amd);
|
radeon_emit_array(cs, sctx->cs_user_data, sel->info.base.cs.user_data_components_amd);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info)
|
static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info)
|
||||||
|
|
@ -734,6 +749,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
|
||||||
if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
|
if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
|
||||||
threadgroups_per_cu = 2;
|
threadgroups_per_cu = 2;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg(
|
radeon_set_sh_reg(
|
||||||
cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
|
cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
|
||||||
ac_get_compute_resource_limits(&sscreen->info, waves_per_threadgroup,
|
ac_get_compute_resource_limits(&sscreen->info, waves_per_threadgroup,
|
||||||
|
|
@ -795,9 +811,10 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(sctx->thread_trace_enabled && sctx->chip_class >= GFX9)) {
|
if (unlikely(sctx->thread_trace_enabled && sctx->chip_class >= GFX9)) {
|
||||||
radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(&sctx->gfx_cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
|
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
|
||||||
|
|
|
||||||
|
|
@ -1084,8 +1084,10 @@ si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe
|
||||||
*/
|
*/
|
||||||
if (!radeon_emitted(gfx_cs, sctx->initial_gfx_cs_size) &&
|
if (!radeon_emitted(gfx_cs, sctx->initial_gfx_cs_size) &&
|
||||||
gfx_cs->current.cdw + need_gfx_dw > gfx_cs->current.max_dw) {
|
gfx_cs->current.cdw + need_gfx_dw > gfx_cs->current.max_dw) {
|
||||||
|
radeon_begin(gfx_cs);
|
||||||
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
|
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
|
||||||
radeon_emit(gfx_cs, 0);
|
radeon_emit(gfx_cs, 0);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
||||||
|
|
@ -1184,6 +1186,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
* TTM buffer moves in the kernel.
|
* TTM buffer moves in the kernel.
|
||||||
*/
|
*/
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||||
radeon_emit(cs, 0); /* CP_COHER_CNTL */
|
radeon_emit(cs, 0); /* CP_COHER_CNTL */
|
||||||
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
||||||
|
|
@ -1195,6 +1198,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
S_586_GLI_INV(V_586_GLI_ALL) | S_586_GLK_INV(1) | S_586_GLV_INV(1) |
|
S_586_GLI_INV(V_586_GLI_ALL) | S_586_GLK_INV(1) | S_586_GLV_INV(1) |
|
||||||
S_586_GL1_INV(1) | S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) |
|
S_586_GL1_INV(1) | S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) |
|
||||||
S_586_GLM_WB(1) | S_586_SEQ(V_586_SEQ_FORWARD));
|
S_586_GLM_WB(1) | S_586_SEQ(V_586_SEQ_FORWARD));
|
||||||
|
radeon_end();
|
||||||
} else {
|
} else {
|
||||||
si_emit_surface_sync(sctx, cs,
|
si_emit_surface_sync(sctx, cs,
|
||||||
S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||||
|
|
@ -1211,6 +1215,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
|
|
||||||
si_emit_initial_compute_regs(sctx, cs);
|
si_emit_initial_compute_regs(sctx, cs);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg(
|
radeon_set_sh_reg(
|
||||||
cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||||
S_00B860_WAVES(sctx->scratch_waves) | S_00B860_WAVESIZE(0)); /* no scratch */
|
S_00B860_WAVES(sctx->scratch_waves) | S_00B860_WAVESIZE(0)); /* no scratch */
|
||||||
|
|
@ -1231,6 +1236,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
radeon_emit(cs, S_03107C_ENABLE(0));
|
radeon_emit(cs, S_03107C_ENABLE(0));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (sctx->last_ib_barrier_buf) {
|
if (sctx->last_ib_barrier_buf) {
|
||||||
assert(!sctx->last_ib_barrier_fence);
|
assert(!sctx->last_ib_barrier_fence);
|
||||||
|
|
@ -1349,6 +1355,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
* in parallel with compute shaders.
|
* in parallel with compute shaders.
|
||||||
*/
|
*/
|
||||||
if (first_dispatch) {
|
if (first_dispatch) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + gds_size / 4, 0));
|
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + gds_size / 4, 0));
|
||||||
radeon_emit(cs, S_370_DST_SEL(V_370_GDS) | S_370_WR_CONFIRM(1));
|
radeon_emit(cs, S_370_DST_SEL(V_370_GDS) | S_370_WR_CONFIRM(1));
|
||||||
radeon_emit(cs, gds_offset);
|
radeon_emit(cs, gds_offset);
|
||||||
|
|
@ -1356,6 +1363,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
radeon_emit(cs, 0); /* value to write */
|
radeon_emit(cs, 0); /* value to write */
|
||||||
if (gds_size == 8)
|
if (gds_size == 8)
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1370,6 +1378,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
assert(shader->config.scratch_bytes_per_wave == 0);
|
assert(shader->config.scratch_bytes_per_wave == 0);
|
||||||
assert(shader->config.num_vgprs * WAVES_PER_TG <= 256 * 4);
|
assert(shader->config.num_vgprs * WAVES_PER_TG <= 256 * 4);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
|
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
|
||||||
radeon_emit(cs, shader_va >> 8);
|
radeon_emit(cs, shader_va >> 8);
|
||||||
radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
|
radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
|
||||||
|
|
@ -1390,6 +1399,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
|
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
|
||||||
ac_get_compute_resource_limits(&sctx->screen->info, WAVES_PER_TG,
|
ac_get_compute_resource_limits(&sctx->screen->info, WAVES_PER_TG,
|
||||||
MAX_WAVES_PER_SH, THREADGROUPS_PER_CU));
|
MAX_WAVES_PER_SH, THREADGROUPS_PER_CU));
|
||||||
|
radeon_end();
|
||||||
sctx->compute_ib_last_shader = shader;
|
sctx->compute_ib_last_shader = shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1417,8 +1427,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
sctx->compute_rewind_va = gfx_cs->gpu_address + (gfx_cs->current.cdw + 1) * 4;
|
sctx->compute_rewind_va = gfx_cs->gpu_address + (gfx_cs->current.cdw + 1) * 4;
|
||||||
|
|
||||||
if (sctx->chip_class <= GFX7 || FORCE_REWIND_EMULATION) {
|
if (sctx->chip_class <= GFX7 || FORCE_REWIND_EMULATION) {
|
||||||
|
radeon_begin(gfx_cs);
|
||||||
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
|
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
|
||||||
radeon_emit(gfx_cs, 0);
|
radeon_emit(gfx_cs, 0);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_cp_wait_mem(
|
si_cp_wait_mem(
|
||||||
sctx, gfx_cs,
|
sctx, gfx_cs,
|
||||||
|
|
@ -1430,8 +1442,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
*/
|
*/
|
||||||
sctx->ws->cs_check_space(gfx_cs, 0, true);
|
sctx->ws->cs_check_space(gfx_cs, 0, true);
|
||||||
} else {
|
} else {
|
||||||
|
radeon_begin(gfx_cs);
|
||||||
radeon_emit(gfx_cs, PKT3(PKT3_REWIND, 0, 0));
|
radeon_emit(gfx_cs, PKT3(PKT3_REWIND, 0, 0));
|
||||||
radeon_emit(gfx_cs, 0);
|
radeon_emit(gfx_cs, 0);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1441,12 +1455,16 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
uint64_t index_va = out_indexbuf_va + start_prim * 12;
|
uint64_t index_va = out_indexbuf_va + start_prim * 12;
|
||||||
|
|
||||||
/* Emit the draw packet into the gfx IB. */
|
/* Emit the draw packet into the gfx IB. */
|
||||||
|
radeon_begin(gfx_cs);
|
||||||
radeon_emit(gfx_cs, PKT3(PKT3_DRAW_INDEX_2, 4, 0));
|
radeon_emit(gfx_cs, PKT3(PKT3_DRAW_INDEX_2, 4, 0));
|
||||||
radeon_emit(gfx_cs, num_prims * vertices_per_prim);
|
radeon_emit(gfx_cs, num_prims * vertices_per_prim);
|
||||||
radeon_emit(gfx_cs, index_va);
|
radeon_emit(gfx_cs, index_va);
|
||||||
radeon_emit(gfx_cs, index_va >> 32);
|
radeon_emit(gfx_cs, index_va >> 32);
|
||||||
radeon_emit(gfx_cs, 0);
|
radeon_emit(gfx_cs, 0);
|
||||||
radeon_emit(gfx_cs, V_0287F0_DI_SRC_SEL_DMA);
|
radeon_emit(gfx_cs, V_0287F0_DI_SRC_SEL_DMA);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
|
radeon_begin_again(cs);
|
||||||
|
|
||||||
/* Continue with the compute IB. */
|
/* Continue with the compute IB. */
|
||||||
if (start_prim == 0) {
|
if (start_prim == 0) {
|
||||||
|
|
@ -1503,6 +1521,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
|
||||||
radeon_emit(cs, S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_PARTIAL_TG_EN(!!partial_block_size) |
|
radeon_emit(cs, S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_PARTIAL_TG_EN(!!partial_block_size) |
|
||||||
S_00B800_ORDERED_APPEND_ENBL(VERTEX_COUNTER_GDS_MODE == 2) |
|
S_00B800_ORDERED_APPEND_ENBL(VERTEX_COUNTER_GDS_MODE == 2) |
|
||||||
S_00B800_ORDER_MODE(0 /* launch in order */));
|
S_00B800_ORDER_MODE(0 /* launch in order */));
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
/* This is only for unordered append. Ordered append writes this from
|
/* This is only for unordered append. Ordered append writes this from
|
||||||
* the shader.
|
* the shader.
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include "si_pipe.h"
|
#include "si_pipe.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
#include "si_build_pm4.h"
|
||||||
|
|
||||||
/* Set this if you want the ME to wait until CP DMA is done.
|
/* Set this if you want the ME to wait until CP DMA is done.
|
||||||
* It should be set on the last CP DMA packet. */
|
* It should be set on the last CP DMA packet. */
|
||||||
|
|
@ -102,6 +103,8 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
||||||
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
|
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX7) {
|
if (sctx->chip_class >= GFX7) {
|
||||||
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
|
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
|
||||||
radeon_emit(cs, header);
|
radeon_emit(cs, header);
|
||||||
|
|
@ -130,6 +133,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
||||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
|
|
@ -428,6 +432,7 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
|
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
|
||||||
radeon_emit(cs, header);
|
radeon_emit(cs, header);
|
||||||
radeon_emit(cs, address); /* SRC_ADDR_LO [31:0] */
|
radeon_emit(cs, address); /* SRC_ADDR_LO [31:0] */
|
||||||
|
|
@ -435,6 +440,7 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
|
||||||
radeon_emit(cs, address); /* DST_ADDR_LO [31:0] */
|
radeon_emit(cs, address); /* DST_ADDR_LO [31:0] */
|
||||||
radeon_emit(cs, address >> 32); /* DST_ADDR_HI [31:0] */
|
radeon_emit(cs, address >> 32); /* DST_ADDR_HI [31:0] */
|
||||||
radeon_emit(cs, command);
|
radeon_emit(cs, command);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_test_gds(struct si_context *sctx)
|
void si_test_gds(struct si_context *sctx)
|
||||||
|
|
@ -495,11 +501,13 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned
|
||||||
radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
|
radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
|
||||||
uint64_t va = buf->gpu_address + offset;
|
uint64_t va = buf->gpu_address + offset;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size / 4, 0));
|
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size / 4, 0));
|
||||||
radeon_emit(cs, S_370_DST_SEL(dst_sel) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine));
|
radeon_emit(cs, S_370_DST_SEL(dst_sel) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
radeon_emit_array(cs, (const uint32_t *)data, size / 4);
|
radeon_emit_array(cs, (const uint32_t *)data, size / 4);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel,
|
void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel,
|
||||||
|
|
@ -517,10 +525,12 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned
|
||||||
uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset;
|
uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset;
|
||||||
uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
|
uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||||
radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM);
|
radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM);
|
||||||
radeon_emit(cs, src_va);
|
radeon_emit(cs, src_va);
|
||||||
radeon_emit(cs, src_va >> 32);
|
radeon_emit(cs, src_va >> 32);
|
||||||
radeon_emit(cs, dst_va);
|
radeon_emit(cs, dst_va);
|
||||||
radeon_emit(cs, dst_va >> 32);
|
radeon_emit(cs, dst_va >> 32);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -144,6 +144,15 @@ si_create_shadowing_ib_preamble(struct si_context *sctx)
|
||||||
return pm4;
|
return pm4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void si_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
|
||||||
|
const uint32_t *values)
|
||||||
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
|
radeon_set_context_reg_seq(cs, reg, num);
|
||||||
|
radeon_emit_array(cs, values, num);
|
||||||
|
radeon_end();
|
||||||
|
}
|
||||||
|
|
||||||
void si_init_cp_reg_shadowing(struct si_context *sctx)
|
void si_init_cp_reg_shadowing(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
if (sctx->screen->info.mid_command_buffer_preemption_enabled ||
|
if (sctx->screen->info.mid_command_buffer_preemption_enabled ||
|
||||||
|
|
@ -174,8 +183,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
|
||||||
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
|
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
|
||||||
si_pm4_emit(sctx, shadowing_preamble);
|
si_pm4_emit(sctx, shadowing_preamble);
|
||||||
ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs,
|
ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array);
|
||||||
radeon_set_context_reg_seq_array);
|
|
||||||
si_pm4_emit(sctx, sctx->cs_preamble_state);
|
si_pm4_emit(sctx, sctx->cs_preamble_state);
|
||||||
|
|
||||||
/* The register values are shadowed, so we won't need to set them again. */
|
/* The register values are shadowed, so we won't need to set them again. */
|
||||||
|
|
|
||||||
|
|
@ -1930,82 +1930,59 @@ void si_shader_change_notify(struct si_context *sctx)
|
||||||
PIPE_SHADER_TESS_EVAL));
|
PIPE_SHADER_TESS_EVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset,
|
#define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \
|
||||||
unsigned pointer_count)
|
unsigned sh_reg_base = (sh_base); \
|
||||||
{
|
if (sh_reg_base) { \
|
||||||
SI_CHECK_SHADOWED_REGS(sh_offset, pointer_count);
|
unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0));
|
\
|
||||||
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
|
while (mask) { \
|
||||||
}
|
int start, count; \
|
||||||
|
u_bit_scan_consecutive_range(&mask, &start, &count); \
|
||||||
static void si_emit_shader_pointer_body(struct si_screen *sscreen, struct radeon_cmdbuf *cs,
|
\
|
||||||
uint64_t va)
|
struct si_descriptors *descs = &sctx->descriptors[start]; \
|
||||||
{
|
unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \
|
||||||
radeon_emit(cs, va);
|
\
|
||||||
|
radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, count); \
|
||||||
assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
|
for (int i = 0; i < count; i++) \
|
||||||
}
|
radeon_emit_32bit_pointer(sctx->screen, cs, descs[i].gpu_address); \
|
||||||
|
} \
|
||||||
static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc,
|
} \
|
||||||
unsigned sh_base)
|
} while (0)
|
||||||
{
|
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
|
||||||
unsigned sh_offset = sh_base + desc->shader_userdata_offset;
|
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, sh_offset, 1);
|
|
||||||
si_emit_shader_pointer_body(sctx->screen, cs, desc->gpu_address);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void si_emit_consecutive_shader_pointers(struct si_context *sctx, unsigned pointer_mask,
|
|
||||||
unsigned sh_base)
|
|
||||||
{
|
|
||||||
if (!sh_base)
|
|
||||||
return;
|
|
||||||
|
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
|
||||||
unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
|
|
||||||
|
|
||||||
while (mask) {
|
|
||||||
int start, count;
|
|
||||||
u_bit_scan_consecutive_range(&mask, &start, &count);
|
|
||||||
|
|
||||||
struct si_descriptors *descs = &sctx->descriptors[start];
|
|
||||||
unsigned sh_offset = sh_base + descs->shader_userdata_offset;
|
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, sh_offset, count);
|
|
||||||
for (int i = 0; i < count; i++)
|
|
||||||
si_emit_shader_pointer_body(sctx->screen, cs, descs[i].gpu_address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
|
static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
|
||||||
{
|
{
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
||||||
/* HW VS stage only used in non-NGG mode. */
|
/* HW VS stage only used in non-NGG mode. */
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
} else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {
|
} else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {
|
||||||
/* We can't use the COMMON registers with register shadowing. */
|
/* We can't use the COMMON registers with register shadowing. */
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
} else if (sctx->chip_class == GFX9) {
|
} else if (sctx->chip_class == GFX9) {
|
||||||
/* Broadcast it to all shader stages. */
|
/* Broadcast it to all shader stages. */
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
|
||||||
si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);
|
radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
||||||
|
|
@ -2016,6 +1993,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
||||||
si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
|
si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
|
||||||
sh_base[PIPE_SHADER_VERTEX]);
|
sh_base[PIPE_SHADER_VERTEX]);
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
|
||||||
|
|
@ -2030,8 +2008,6 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
||||||
sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
|
sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
|
||||||
|
|
||||||
if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) {
|
if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) {
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
|
||||||
|
|
||||||
/* Find the location of the VB descriptor pointer. */
|
/* Find the location of the VB descriptor pointer. */
|
||||||
unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
|
unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
|
||||||
if (sctx->chip_class >= GFX9) {
|
if (sctx->chip_class >= GFX9) {
|
||||||
|
|
@ -2042,22 +2018,22 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
|
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
|
||||||
si_emit_shader_pointer_head(cs, sh_offset, 1);
|
radeon_set_sh_reg_seq(cs, sh_offset, 1);
|
||||||
si_emit_shader_pointer_body(
|
radeon_emit_32bit_pointer(
|
||||||
sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + sctx->vb_descriptors_offset);
|
sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + sctx->vb_descriptors_offset);
|
||||||
sctx->vertex_buffer_pointer_dirty = false;
|
sctx->vertex_buffer_pointer_dirty = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements &&
|
if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements &&
|
||||||
sctx->screen->num_vbos_in_user_sgprs) {
|
sctx->screen->num_vbos_in_user_sgprs) {
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
|
||||||
unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs);
|
unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs);
|
||||||
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4;
|
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4;
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, sh_offset, num_desc * 4);
|
radeon_set_sh_reg_seq(cs, sh_offset, num_desc * 4);
|
||||||
radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_desc * 4);
|
radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_desc * 4);
|
||||||
sctx->vertex_buffer_user_sgprs_dirty = false;
|
sctx->vertex_buffer_user_sgprs_dirty = false;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (sctx->graphics_bindless_pointer_dirty) {
|
if (sctx->graphics_bindless_pointer_dirty) {
|
||||||
si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
|
si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
|
||||||
|
|
@ -2071,12 +2047,13 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
||||||
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
|
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
|
||||||
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
|
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
|
||||||
R_00B900_COMPUTE_USER_DATA_0);
|
R_00B900_COMPUTE_USER_DATA_0);
|
||||||
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
|
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
|
||||||
|
|
||||||
if (sctx->compute_bindless_pointer_dirty) {
|
if (sctx->compute_bindless_pointer_dirty) {
|
||||||
si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
|
radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base);
|
||||||
sctx->compute_bindless_pointer_dirty = false;
|
sctx->compute_bindless_pointer_dirty = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2085,7 +2062,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
||||||
if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
|
if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
|
||||||
struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
|
struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 +
|
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
|
||||||
shader->cs_shaderbufs_sgpr_index * 4,
|
shader->cs_shaderbufs_sgpr_index * 4,
|
||||||
num_shaderbufs * 4);
|
num_shaderbufs * 4);
|
||||||
|
|
||||||
|
|
@ -2100,7 +2077,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
||||||
if (num_images && sctx->compute_image_sgprs_dirty) {
|
if (num_images && sctx->compute_image_sgprs_dirty) {
|
||||||
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
|
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 +
|
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
|
||||||
shader->cs_images_sgpr_index * 4,
|
shader->cs_images_sgpr_index * 4,
|
||||||
shader->cs_images_num_sgprs);
|
shader->cs_images_num_sgprs);
|
||||||
|
|
||||||
|
|
@ -2119,6 +2096,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
||||||
|
|
||||||
sctx->compute_image_sgprs_dirty = false;
|
sctx->compute_image_sgprs_dirty = false;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* BINDLESS */
|
/* BINDLESS */
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
|
||||||
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
|
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
|
||||||
bool compute_ib = !ctx->has_graphics || cs == &ctx->prim_discard_compute_cs;
|
bool compute_ib = !ctx->has_graphics || cs == &ctx->prim_discard_compute_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (ctx->chip_class >= GFX9 || (compute_ib && ctx->chip_class >= GFX7)) {
|
if (ctx->chip_class >= GFX9 || (compute_ib && ctx->chip_class >= GFX7)) {
|
||||||
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
||||||
* counters) must immediately precede every timestamp event to
|
* counters) must immediately precede every timestamp event to
|
||||||
|
|
@ -136,6 +138,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
|
||||||
radeon_emit(cs, 0); /* unused */
|
radeon_emit(cs, 0); /* unused */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (buf) {
|
if (buf) {
|
||||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
|
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
|
||||||
}
|
}
|
||||||
|
|
@ -154,6 +158,7 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen)
|
||||||
void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref,
|
void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref,
|
||||||
uint32_t mask, unsigned flags)
|
uint32_t mask, unsigned flags)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||||
radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags);
|
radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags);
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
|
|
@ -161,6 +166,7 @@ void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t v
|
||||||
radeon_emit(cs, ref); /* reference value */
|
radeon_emit(cs, ref); /* reference value */
|
||||||
radeon_emit(cs, mask); /* mask */
|
radeon_emit(cs, mask); /* mask */
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence)
|
static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence)
|
||||||
|
|
|
||||||
|
|
@ -110,8 +110,10 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
|
||||||
|
|
||||||
/* Make sure compute shaders are idle before leaving the IB, so that
|
/* Make sure compute shaders are idle before leaving the IB, so that
|
||||||
* the next IB doesn't overwrite GDS that might be in use. */
|
* the next IB doesn't overwrite GDS that might be in use. */
|
||||||
|
radeon_begin(compute_cs);
|
||||||
radeon_emit(compute_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(compute_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(compute_cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
radeon_emit(compute_cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
/* Save the GDS prim restart counter if needed. */
|
/* Save the GDS prim restart counter if needed. */
|
||||||
if (ctx->preserve_prim_restart_gds_at_flush) {
|
if (ctx->preserve_prim_restart_gds_at_flush) {
|
||||||
|
|
@ -559,6 +561,8 @@ void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, uns
|
||||||
|
|
||||||
assert(sctx->chip_class <= GFX9);
|
assert(sctx->chip_class <= GFX9);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->chip_class == GFX9 || compute_ib) {
|
if (sctx->chip_class == GFX9 || compute_ib) {
|
||||||
/* Flush caches and wait for the caches to assert idle. */
|
/* Flush caches and wait for the caches to assert idle. */
|
||||||
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
|
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
|
||||||
|
|
@ -576,6 +580,7 @@ void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, uns
|
||||||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
/* ACQUIRE_MEM has an implicit context roll if the current context
|
/* ACQUIRE_MEM has an implicit context roll if the current context
|
||||||
* is busy. */
|
* is busy. */
|
||||||
|
|
@ -599,6 +604,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
||||||
/* We don't need these. */
|
/* We don't need these. */
|
||||||
assert(!(flags & (SI_CONTEXT_VGT_STREAMOUT_SYNC | SI_CONTEXT_FLUSH_AND_INV_DB_META)));
|
assert(!(flags & (SI_CONTEXT_VGT_STREAMOUT_SYNC | SI_CONTEXT_FLUSH_AND_INV_DB_META)));
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (flags & SI_CONTEXT_VGT_FLUSH) {
|
if (flags & SI_CONTEXT_VGT_FLUSH) {
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||||
|
|
@ -686,6 +693,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
||||||
ctx->num_cs_flushes++;
|
ctx->num_cs_flushes++;
|
||||||
ctx->compute_is_busy = false;
|
ctx->compute_is_busy = false;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (cb_db_event) {
|
if (cb_db_event) {
|
||||||
struct si_resource* wait_mem_scratch = unlikely(ctx->ws->cs_is_secure(cs)) ?
|
struct si_resource* wait_mem_scratch = unlikely(ctx->ws->cs_is_secure(cs)) ?
|
||||||
|
|
@ -729,6 +737,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
||||||
si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL);
|
si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin_again(cs);
|
||||||
|
|
||||||
/* Ignore fields that only modify the behavior of other fields. */
|
/* Ignore fields that only modify the behavior of other fields. */
|
||||||
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
|
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
|
||||||
/* Flush caches and wait for the caches to assert idle.
|
/* Flush caches and wait for the caches to assert idle.
|
||||||
|
|
@ -757,6 +767,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
ctx->flags = 0;
|
ctx->flags = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -820,6 +831,8 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
|
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
|
||||||
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
|
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
|
|
@ -868,6 +881,8 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
|
/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
|
||||||
* wait for idle on GFX9. We have to use a TS event.
|
* wait for idle on GFX9. We have to use a TS event.
|
||||||
*/
|
*/
|
||||||
|
|
@ -934,8 +949,10 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
if (sctx->has_graphics &&
|
if (sctx->has_graphics &&
|
||||||
(cp_coher_cntl || (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VCACHE |
|
(cp_coher_cntl || (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VCACHE |
|
||||||
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2)))) {
|
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2)))) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* GFX6-GFX8 only:
|
/* GFX6-GFX8 only:
|
||||||
|
|
@ -988,11 +1005,15 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||||
si_prim_discard_signal_next_compute_ib_start(sctx);
|
si_prim_discard_signal_next_compute_ib_start(sctx);
|
||||||
|
|
||||||
if (flags & SI_CONTEXT_START_PIPELINE_STATS) {
|
if (flags & SI_CONTEXT_START_PIPELINE_STATS) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
|
||||||
|
radeon_end();
|
||||||
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
|
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
sctx->flags = 0;
|
sctx->flags = 0;
|
||||||
|
|
|
||||||
|
|
@ -723,16 +723,20 @@ static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
|
||||||
value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
|
value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
|
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
|
static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
|
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
|
||||||
radeon_emit(cs, shaders & 0x7f);
|
radeon_emit(cs, shaders & 0x7f);
|
||||||
radeon_emit(cs, 0xffffffff);
|
radeon_emit(cs, 0xffffffff);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block, unsigned count,
|
static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block, unsigned count,
|
||||||
|
|
@ -749,6 +753,8 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block
|
||||||
if (regs->layout & SI_PC_FAKE)
|
if (regs->layout & SI_PC_FAKE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (layout_multi == SI_PC_MULTI_BLOCK) {
|
if (layout_multi == SI_PC_MULTI_BLOCK) {
|
||||||
assert(!(regs->layout & SI_PC_REG_REVERSE));
|
assert(!(regs->layout & SI_PC_REG_REVERSE));
|
||||||
|
|
||||||
|
|
@ -826,6 +832,7 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
|
static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
|
||||||
|
|
@ -835,12 +842,14 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer
|
||||||
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
|
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
|
||||||
COPY_DATA_IMM, NULL, 1);
|
COPY_DATA_IMM, NULL, 1);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
|
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
|
||||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
|
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
|
||||||
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
|
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
|
||||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
|
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: The buffer was already added in si_pc_emit_start, so we don't have to
|
/* Note: The buffer was already added in si_pc_emit_start, so we don't have to
|
||||||
|
|
@ -853,6 +862,7 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
|
||||||
EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
|
EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
|
||||||
si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
|
si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
|
|
@ -860,6 +870,7 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
|
||||||
radeon_set_uconfig_reg(
|
radeon_set_uconfig_reg(
|
||||||
cs, R_036020_CP_PERFMON_CNTL,
|
cs, R_036020_CP_PERFMON_CNTL,
|
||||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
|
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block, unsigned count,
|
static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block, unsigned count,
|
||||||
|
|
@ -871,6 +882,8 @@ static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block,
|
||||||
unsigned reg = regs->counter0_lo;
|
unsigned reg = regs->counter0_lo;
|
||||||
unsigned reg_delta = 8;
|
unsigned reg_delta = 8;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (!(regs->layout & SI_PC_FAKE)) {
|
if (!(regs->layout & SI_PC_FAKE)) {
|
||||||
if (regs->layout & SI_PC_REG_REVERSE)
|
if (regs->layout & SI_PC_REG_REVERSE)
|
||||||
reg_delta = -reg_delta;
|
reg_delta = -reg_delta;
|
||||||
|
|
@ -901,6 +914,7 @@ static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block,
|
||||||
va += sizeof(uint64_t);
|
va += sizeof(uint64_t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
|
static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
|
||||||
|
|
@ -921,6 +935,8 @@ static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery
|
||||||
|
|
||||||
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
|
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
|
||||||
{
|
{
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
|
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
|
||||||
S_037390_PERFMON_CLOCK_STATE(inhibit));
|
S_037390_PERFMON_CLOCK_STATE(inhibit));
|
||||||
|
|
@ -928,6 +944,7 @@ void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, b
|
||||||
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
|
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
|
||||||
S_0372FC_PERFMON_CLOCK_STATE(inhibit));
|
S_0372FC_PERFMON_CLOCK_STATE(inhibit));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
|
static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
|
||||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
|
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit_array(cs, state->pm4, state->ndw);
|
radeon_emit_array(cs, state->pm4, state->ndw);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (state->atom.emit)
|
if (state->atom.emit)
|
||||||
state->atom.emit(sctx);
|
state->atom.emit(sctx);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "si_query.h"
|
#include "si_query.h"
|
||||||
|
#include "si_build_pm4.h"
|
||||||
|
|
||||||
#include "amd/common/sid.h"
|
#include "amd/common/sid.h"
|
||||||
#include "si_pipe.h"
|
#include "si_pipe.h"
|
||||||
|
|
@ -771,10 +772,12 @@ static unsigned event_type_for_stream(unsigned stream)
|
||||||
|
|
||||||
static void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigned stream)
|
static void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigned stream)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
|
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query,
|
static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query,
|
||||||
|
|
@ -785,12 +788,15 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
|
||||||
switch (query->b.type) {
|
switch (query->b.type) {
|
||||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_end();
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||||
case PIPE_QUERY_SO_STATISTICS:
|
case PIPE_QUERY_SO_STATISTICS:
|
||||||
|
|
@ -805,12 +811,15 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
|
||||||
si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
|
si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
|
||||||
EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type);
|
EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type);
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
case PIPE_QUERY_PIPELINE_STATISTICS: {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_end();
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
@ -846,15 +855,18 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
|
||||||
switch (query->b.type) {
|
switch (query->b.type) {
|
||||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
|
||||||
va += 8;
|
va += 8;
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
fence_va = va + sctx->screen->info.max_render_backends * 16 - 8;
|
fence_va = va + sctx->screen->info.max_render_backends * 16 - 8;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||||
case PIPE_QUERY_SO_STATISTICS:
|
case PIPE_QUERY_SO_STATISTICS:
|
||||||
|
|
@ -879,10 +891,12 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
|
||||||
unsigned sample_size = (query->result_size - 8) / 2;
|
unsigned sample_size = (query->result_size - 8) / 2;
|
||||||
|
|
||||||
va += sample_size;
|
va += sample_size;
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, va >> 32);
|
radeon_emit(cs, va >> 32);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
fence_va = va + sample_size;
|
fence_va = va + sample_size;
|
||||||
break;
|
break;
|
||||||
|
|
@ -934,6 +948,8 @@ static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf,
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (ctx->chip_class >= GFX9) {
|
if (ctx->chip_class >= GFX9) {
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
|
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
|
||||||
radeon_emit(cs, op);
|
radeon_emit(cs, op);
|
||||||
|
|
@ -944,6 +960,8 @@ static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf,
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
radeon_emit(cs, op | ((va >> 32) & 0xFF));
|
radeon_emit(cs, op | ((va >> 32) & 0xFF));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY);
|
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,22 +35,6 @@ static void
|
||||||
si_emit_spi_config_cntl(struct si_context* sctx,
|
si_emit_spi_config_cntl(struct si_context* sctx,
|
||||||
struct radeon_cmdbuf *cs, bool enable);
|
struct radeon_cmdbuf *cs, bool enable);
|
||||||
|
|
||||||
static inline void
|
|
||||||
radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs,
|
|
||||||
unsigned reg,
|
|
||||||
unsigned value)
|
|
||||||
{
|
|
||||||
assert(reg < CIK_UCONFIG_REG_OFFSET);
|
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
|
||||||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
|
|
||||||
COPY_DATA_DST_SEL(COPY_DATA_PERF));
|
|
||||||
radeon_emit(cs, value);
|
|
||||||
radeon_emit(cs, 0); /* unused */
|
|
||||||
radeon_emit(cs, reg >> 2);
|
|
||||||
radeon_emit(cs, 0); /* unused */
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
si_thread_trace_init_bo(struct si_context *sctx)
|
si_thread_trace_init_bo(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
|
|
@ -89,6 +73,8 @@ si_emit_thread_trace_start(struct si_context* sctx,
|
||||||
uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||||
unsigned max_se = sscreen->info.max_se;
|
unsigned max_se = sscreen->info.max_se;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
for (unsigned se = 0; se < max_se; se++) {
|
for (unsigned se = 0; se < max_se; se++) {
|
||||||
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
|
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
|
||||||
uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se);
|
uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se);
|
||||||
|
|
@ -220,6 +206,7 @@ si_emit_thread_trace_start(struct si_context* sctx,
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static const uint32_t gfx9_thread_trace_info_regs[] =
|
static const uint32_t gfx9_thread_trace_info_regs[] =
|
||||||
|
|
@ -258,6 +245,8 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
|
||||||
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
|
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
|
||||||
uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
|
uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Copy back the info struct one DWORD at a time. */
|
/* Copy back the info struct one DWORD at a time. */
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||||
|
|
@ -269,6 +258,7 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
|
||||||
radeon_emit(cs, (info_va + i * 4));
|
radeon_emit(cs, (info_va + i * 4));
|
||||||
radeon_emit(cs, (info_va + i * 4) >> 32);
|
radeon_emit(cs, (info_va + i * 4) >> 32);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -280,6 +270,8 @@ si_emit_thread_trace_stop(struct si_context *sctx,
|
||||||
{
|
{
|
||||||
unsigned max_se = sctx->screen->info.max_se;
|
unsigned max_se = sctx->screen->info.max_se;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Stop the thread trace with a different event based on the queue. */
|
/* Stop the thread trace with a different event based on the queue. */
|
||||||
if (queue_family_index == RING_COMPUTE) {
|
if (queue_family_index == RING_COMPUTE) {
|
||||||
radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
|
radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
|
||||||
|
|
@ -291,8 +283,11 @@ si_emit_thread_trace_stop(struct si_context *sctx,
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
for (unsigned se = 0; se < max_se; se++) {
|
for (unsigned se = 0; se < max_se; se++) {
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Target SEi and SH0. */
|
/* Target SEi and SH0. */
|
||||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
||||||
S_030800_SE_INDEX(se) |
|
S_030800_SE_INDEX(se) |
|
||||||
|
|
@ -335,15 +330,18 @@ si_emit_thread_trace_stop(struct si_context *sctx,
|
||||||
radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
|
radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_copy_thread_trace_info_regs(sctx, cs, se);
|
si_copy_thread_trace_info_regs(sctx, cs, se);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Restore global broadcasting. */
|
/* Restore global broadcasting. */
|
||||||
|
radeon_begin_again(cs);
|
||||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
||||||
S_030800_SE_BROADCAST_WRITES(1) |
|
S_030800_SE_BROADCAST_WRITES(1) |
|
||||||
S_030800_SH_BROADCAST_WRITES(1) |
|
S_030800_SH_BROADCAST_WRITES(1) |
|
||||||
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -351,6 +349,8 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
|
||||||
{
|
{
|
||||||
struct radeon_winsys *ws = sctx->ws;
|
struct radeon_winsys *ws = sctx->ws;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case RING_GFX:
|
case RING_GFX:
|
||||||
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||||
|
|
@ -362,6 +362,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
ws->cs_add_buffer(cs,
|
ws->cs_add_buffer(cs,
|
||||||
sctx->thread_trace->bo,
|
sctx->thread_trace->bo,
|
||||||
|
|
@ -390,6 +391,9 @@ static void
|
||||||
si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
|
si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
|
||||||
{
|
{
|
||||||
struct radeon_winsys *ws = sctx->ws;
|
struct radeon_winsys *ws = sctx->ws;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case RING_GFX:
|
case RING_GFX:
|
||||||
radeon_emit(sctx->thread_trace->stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
radeon_emit(sctx->thread_trace->stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||||
|
|
@ -401,6 +405,8 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
|
||||||
radeon_emit(sctx->thread_trace->stop_cs[family], 0);
|
radeon_emit(sctx->thread_trace->stop_cs[family], 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
ws->cs_add_buffer(cs,
|
ws->cs_add_buffer(cs,
|
||||||
sctx->thread_trace->bo,
|
sctx->thread_trace->bo,
|
||||||
RADEON_USAGE_READWRITE,
|
RADEON_USAGE_READWRITE,
|
||||||
|
|
@ -643,6 +649,8 @@ si_emit_thread_trace_userdata(struct si_context* sctx,
|
||||||
{
|
{
|
||||||
const uint32_t *dwords = (uint32_t *)data;
|
const uint32_t *dwords = (uint32_t *)data;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
while (num_dwords > 0) {
|
while (num_dwords > 0) {
|
||||||
uint32_t count = MIN2(num_dwords, 2);
|
uint32_t count = MIN2(num_dwords, 2);
|
||||||
|
|
||||||
|
|
@ -655,12 +663,15 @@ si_emit_thread_trace_userdata(struct si_context* sctx,
|
||||||
dwords += count;
|
dwords += count;
|
||||||
num_dwords -= count;
|
num_dwords -= count;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
si_emit_spi_config_cntl(struct si_context* sctx,
|
si_emit_spi_config_cntl(struct si_context* sctx,
|
||||||
struct radeon_cmdbuf *cs, bool enable)
|
struct radeon_cmdbuf *cs, bool enable)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX9) {
|
if (sctx->chip_class >= GFX9) {
|
||||||
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
|
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
|
||||||
S_031100_EXP_PRIORITY_ORDER(3) |
|
S_031100_EXP_PRIORITY_ORDER(3) |
|
||||||
|
|
@ -677,6 +688,7 @@ si_emit_spi_config_cntl(struct si_context* sctx,
|
||||||
S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
|
S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
|
||||||
S_009100_ENABLE_SQG_BOP_EVENTS(enable));
|
S_009100_ENABLE_SQG_BOP_EVENTS(enable));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -91,11 +91,13 @@ static void si_emit_cb_render_state(struct si_context *sctx)
|
||||||
if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) {
|
if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) {
|
||||||
sctx->last_cb_target_mask = cb_target_mask;
|
sctx->last_cb_target_mask = cb_target_mask;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned initial_cdw = cs->current.cdw;
|
radeon_begin(cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
|
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
|
||||||
cb_target_mask);
|
cb_target_mask);
|
||||||
|
|
||||||
|
|
@ -256,8 +258,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
|
radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
|
||||||
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
|
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
|
||||||
}
|
}
|
||||||
if (initial_cdw != cs->current.cdw)
|
radeon_end_update_context_roll(sctx);
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -689,8 +690,10 @@ static void si_emit_blend_color(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
|
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
|
||||||
radeon_emit_array(cs, (uint32_t *)sctx->blend_color.state.color, 4);
|
radeon_emit_array(cs, (uint32_t *)sctx->blend_color.state.color, 4);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -721,8 +724,10 @@ static void si_emit_clip_state(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4);
|
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4);
|
||||||
radeon_emit_array(cs, (uint32_t *)sctx->clip_state.state.ucp, 6 * 4);
|
radeon_emit_array(cs, (uint32_t *)sctx->clip_state.state.ucp, 6 * 4);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_clip_regs(struct si_context *sctx)
|
static void si_emit_clip_regs(struct si_context *sctx)
|
||||||
|
|
@ -747,7 +752,6 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
||||||
clipdist_mask &= rs->clip_plane_enable;
|
clipdist_mask &= rs->clip_plane_enable;
|
||||||
culldist_mask |= clipdist_mask;
|
culldist_mask |= clipdist_mask;
|
||||||
|
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
|
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
|
||||||
S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
|
S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
|
||||||
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
|
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
|
||||||
|
|
@ -755,6 +759,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
||||||
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
|
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
|
||||||
clipdist_mask | (culldist_mask << 8);
|
clipdist_mask | (culldist_mask << 8);
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
|
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
|
||||||
SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,
|
SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,
|
||||||
|
|
@ -765,9 +771,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
|
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
|
||||||
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
|
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1048,6 +1052,7 @@ static void si_emit_stencil_ref(struct si_context *sctx)
|
||||||
struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
|
struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
|
||||||
struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
|
struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
|
radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
|
||||||
radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
|
radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
|
||||||
S_028430_STENCILMASK(dsa->valuemask[0]) |
|
S_028430_STENCILMASK(dsa->valuemask[0]) |
|
||||||
|
|
@ -1056,6 +1061,7 @@ static void si_emit_stencil_ref(struct si_context *sctx)
|
||||||
S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
|
S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
|
||||||
S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
|
S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
|
||||||
S_028434_STENCILOPVAL_BF(1));
|
S_028434_STENCILOPVAL_BF(1));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)
|
static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)
|
||||||
|
|
@ -1334,7 +1340,6 @@ static void si_emit_db_render_state(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||||
unsigned db_shader_control, db_render_control, db_count_control;
|
unsigned db_shader_control, db_render_control, db_count_control;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
/* DB_RENDER_CONTROL */
|
/* DB_RENDER_CONTROL */
|
||||||
if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {
|
if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {
|
||||||
|
|
@ -1374,6 +1379,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
|
radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
|
||||||
db_render_control, db_count_control);
|
db_render_control, db_count_control);
|
||||||
|
|
||||||
|
|
@ -1427,9 +1433,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
|
||||||
S_028064_VRS_OVERRIDE_RATE_Y(0));
|
S_028064_VRS_OVERRIDE_RATE_Y(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2909,6 +2913,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
|
||||||
struct si_surface *cb = NULL;
|
struct si_surface *cb = NULL;
|
||||||
unsigned cb_color_info = 0;
|
unsigned cb_color_info = 0;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Colorbuffers. */
|
/* Colorbuffers. */
|
||||||
for (i = 0; i < nr_cbufs; i++) {
|
for (i = 0; i < nr_cbufs; i++) {
|
||||||
uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
|
uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
|
||||||
|
|
@ -3260,6 +3266,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_update_display_dcc_dirty(sctx);
|
si_update_display_dcc_dirty(sctx);
|
||||||
|
|
||||||
|
|
@ -3292,6 +3299,8 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
|
||||||
si_emit_sample_locations(cs, nr_samples);
|
si_emit_sample_locations(cs, nr_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->family >= CHIP_POLARIS10) {
|
if (sctx->family >= CHIP_POLARIS10) {
|
||||||
unsigned small_prim_filter_cntl =
|
unsigned small_prim_filter_cntl =
|
||||||
S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
|
S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
|
||||||
|
|
@ -3323,6 +3332,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg(
|
radeon_opt_set_context_reg(
|
||||||
sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
|
sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
|
||||||
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
|
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_out_of_order_rasterization(struct si_context *sctx)
|
static bool si_out_of_order_rasterization(struct si_context *sctx)
|
||||||
|
|
@ -3501,7 +3511,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned initial_cdw = cs->current.cdw;
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
|
/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
|
||||||
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
|
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
|
||||||
|
|
@ -3512,7 +3522,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
|
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
|
||||||
sc_mode_cntl_1);
|
sc_mode_cntl_1);
|
||||||
|
|
||||||
if (initial_cdw != cs->current.cdw) {
|
if (radeon_packets_added()) {
|
||||||
sctx->context_roll = true;
|
sctx->context_roll = true;
|
||||||
|
|
||||||
/* GFX9: Flush DFSM when the AA mode changes. */
|
/* GFX9: Flush DFSM when the AA mode changes. */
|
||||||
|
|
@ -3521,6 +3531,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_update_ps_iter_samples(struct si_context *sctx)
|
void si_update_ps_iter_samples(struct si_context *sctx)
|
||||||
|
|
@ -4509,9 +4520,11 @@ static void si_emit_sample_mask(struct si_context *sctx)
|
||||||
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
|
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
|
||||||
(mask & 1 && sctx->blitter->running));
|
(mask & 1 && sctx->blitter->running));
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
|
radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
|
||||||
radeon_emit(cs, mask | (mask << 16));
|
radeon_emit(cs, mask | (mask << 16));
|
||||||
radeon_emit(cs, mask | (mask << 16));
|
radeon_emit(cs, mask | (mask << 16));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
|
static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
|
||||||
|
|
|
||||||
|
|
@ -404,7 +404,7 @@ static void gfx10_get_bin_sizes(struct si_context *sctx, unsigned cb_target_enab
|
||||||
|
|
||||||
static void si_emit_dpbb_disable(struct si_context *sctx)
|
static void si_emit_dpbb_disable(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
radeon_begin(&sctx->gfx_cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX10) {
|
if (sctx->chip_class >= GFX10) {
|
||||||
struct uvec2 bin_size = {};
|
struct uvec2 bin_size = {};
|
||||||
|
|
@ -441,8 +441,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg(
|
radeon_opt_set_context_reg(
|
||||||
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
|
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
|
||||||
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
radeon_end_update_context_roll(sctx);
|
||||||
sctx->context_roll = true;
|
|
||||||
|
|
||||||
sctx->last_binning_enabled = false;
|
sctx->last_binning_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
@ -526,7 +525,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
|
||||||
if (bin_size.y >= 32)
|
if (bin_size.y >= 32)
|
||||||
bin_size_extend.y = util_logbase2(bin_size.y) - 5;
|
bin_size_extend.y = util_logbase2(bin_size.y) - 5;
|
||||||
|
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(
|
radeon_opt_set_context_reg(
|
||||||
sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
|
sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
|
||||||
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
|
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
|
||||||
|
|
@ -546,8 +545,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg(
|
radeon_opt_set_context_reg(
|
||||||
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
|
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
|
||||||
S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
radeon_end_update_context_roll(sctx);
|
||||||
sctx->context_roll = true;
|
|
||||||
|
|
||||||
sctx->last_binning_enabled = true;
|
sctx->last_binning_enabled = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -399,6 +399,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
assert(ls_current->config.lds_size == 0);
|
assert(ls_current->config.lds_size == 0);
|
||||||
|
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->chip_class >= GFX9) {
|
if (sctx->chip_class >= GFX9) {
|
||||||
unsigned hs_rsrc2 = ls_current->config.rsrc2;
|
unsigned hs_rsrc2 = ls_current->config.rsrc2;
|
||||||
|
|
@ -443,6 +444,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
|
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
|
||||||
radeon_emit(cs, offchip_layout);
|
radeon_emit(cs, offchip_layout);
|
||||||
radeon_emit(cs, ring_va);
|
radeon_emit(cs, ring_va);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
unsigned ls_hs_config =
|
unsigned ls_hs_config =
|
||||||
S_028B58_NUM_PATCHES(*num_patches) |
|
S_028B58_NUM_PATCHES(*num_patches) |
|
||||||
|
|
@ -450,13 +452,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||||
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
|
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
|
||||||
|
|
||||||
if (sctx->last_ls_hs_config != ls_hs_config) {
|
if (sctx->last_ls_hs_config != ls_hs_config) {
|
||||||
|
radeon_begin(cs);
|
||||||
if (sctx->chip_class >= GFX7) {
|
if (sctx->chip_class >= GFX7) {
|
||||||
radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
|
radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
|
||||||
} else {
|
} else {
|
||||||
radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
|
radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
|
||||||
}
|
}
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
sctx->last_ls_hs_config = ls_hs_config;
|
sctx->last_ls_hs_config = ls_hs_config;
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -734,7 +737,8 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
|
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
|
||||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||||
unsigned initial_cdw = cs->current.cdw;
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (unlikely(si_is_line_stipple_enabled(sctx))) {
|
if (unlikely(si_is_line_stipple_enabled(sctx))) {
|
||||||
/* For lines, reset the stipple pattern at each primitive. Otherwise,
|
/* For lines, reset the stipple pattern at each primitive. Otherwise,
|
||||||
|
|
@ -756,8 +760,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
|
||||||
sctx->last_gs_out_prim = gs_out_prim;
|
sctx->last_gs_out_prim = gs_out_prim;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GFX_VERSION == GFX9 && initial_cdw != cs->current.cdw)
|
if (GFX_VERSION == GFX9)
|
||||||
sctx->context_roll = true;
|
radeon_end_update_context_roll(sctx);
|
||||||
|
else
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (NGG) {
|
if (NGG) {
|
||||||
struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
|
struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
|
||||||
|
|
@ -797,6 +803,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
|
||||||
/* For the API vertex shader (VS_STATE_INDEXED, LS_OUT_*). */
|
/* For the API vertex shader (VS_STATE_INDEXED, LS_OUT_*). */
|
||||||
unsigned vs_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
|
unsigned vs_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
|
||||||
PIPE_SHADER_VERTEX);
|
PIPE_SHADER_VERTEX);
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_sh_reg(cs, vs_base + SI_SGPR_VS_STATE_BITS * 4,
|
radeon_set_sh_reg(cs, vs_base + SI_SGPR_VS_STATE_BITS * 4,
|
||||||
sctx->current_vs_state);
|
sctx->current_vs_state);
|
||||||
|
|
||||||
|
|
@ -815,6 +822,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
|
||||||
radeon_set_sh_reg(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + SI_SGPR_VS_STATE_BITS * 4,
|
radeon_set_sh_reg(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + SI_SGPR_VS_STATE_BITS * 4,
|
||||||
sctx->current_vs_state);
|
sctx->current_vs_state);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->last_vs_state = sctx->current_vs_state;
|
sctx->last_vs_state = sctx->current_vs_state;
|
||||||
}
|
}
|
||||||
|
|
@ -845,14 +853,18 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
|
||||||
|
|
||||||
/* Draw state. */
|
/* Draw state. */
|
||||||
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
|
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (GFX_VERSION == GFX9)
|
if (GFX_VERSION == GFX9)
|
||||||
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4,
|
radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION,
|
||||||
ia_multi_vgt_param);
|
R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
|
||||||
else if (GFX_VERSION >= GFX7)
|
else if (GFX_VERSION >= GFX7)
|
||||||
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
||||||
else
|
else
|
||||||
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
||||||
|
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->last_multi_vgt_param = ia_multi_vgt_param;
|
sctx->last_multi_vgt_param = ia_multi_vgt_param;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -897,7 +909,11 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
|
||||||
ge_cntl |= S_03096C_PACKET_TO_ONE_PA(si_is_line_stipple_enabled(sctx));
|
ge_cntl |= S_03096C_PACKET_TO_ONE_PA(si_is_line_stipple_enabled(sctx));
|
||||||
|
|
||||||
if (ge_cntl != sctx->last_multi_vgt_param) {
|
if (ge_cntl != sctx->last_multi_vgt_param) {
|
||||||
radeon_set_uconfig_reg(&sctx->gfx_cs, R_03096C_GE_CNTL, ge_cntl);
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
radeon_set_uconfig_reg(cs, R_03096C_GE_CNTL, ge_cntl);
|
||||||
|
radeon_end();
|
||||||
sctx->last_multi_vgt_param = ge_cntl;
|
sctx->last_multi_vgt_param = ge_cntl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -919,13 +935,15 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
||||||
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
|
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
|
||||||
min_vertex_count, vertices_per_patch);
|
min_vertex_count, vertices_per_patch);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (prim != sctx->last_prim) {
|
if (prim != sctx->last_prim) {
|
||||||
unsigned vgt_prim = si_conv_pipe_prim(prim);
|
unsigned vgt_prim = si_conv_pipe_prim(prim);
|
||||||
|
|
||||||
if (GFX_VERSION >= GFX10)
|
if (GFX_VERSION >= GFX10)
|
||||||
radeon_set_uconfig_reg(cs, R_030908_VGT_PRIMITIVE_TYPE, vgt_prim);
|
radeon_set_uconfig_reg(cs, R_030908_VGT_PRIMITIVE_TYPE, vgt_prim);
|
||||||
else if (GFX_VERSION >= GFX7)
|
else if (GFX_VERSION >= GFX7)
|
||||||
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim);
|
radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION, R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim);
|
||||||
else
|
else
|
||||||
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim);
|
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim);
|
||||||
|
|
||||||
|
|
@ -947,14 +965,17 @@ static void si_emit_draw_registers(struct si_context *sctx,
|
||||||
if (GFX_VERSION == GFX9)
|
if (GFX_VERSION == GFX9)
|
||||||
sctx->context_roll = true;
|
sctx->context_roll = true;
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define EMIT_SQTT_END_DRAW do { \
|
#define EMIT_SQTT_END_DRAW do { \
|
||||||
if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace_enabled)) { \
|
if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace_enabled)) { \
|
||||||
|
radeon_begin(&sctx->gfx_cs); \
|
||||||
radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); \
|
radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); \
|
||||||
radeon_emit(&sctx->gfx_cs, \
|
radeon_emit(&sctx->gfx_cs, \
|
||||||
EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | \
|
EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | \
|
||||||
EVENT_INDEX(0)); \
|
EVENT_INDEX(0)); \
|
||||||
|
radeon_end(); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
@ -979,7 +1000,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
if (indirect && indirect->count_from_stream_output) {
|
if (indirect && indirect->count_from_stream_output) {
|
||||||
struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output;
|
struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
|
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL,
|
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL,
|
||||||
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, COPY_DATA_SRC_MEM,
|
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, COPY_DATA_SRC_MEM,
|
||||||
t->buf_filled_size, t->buf_filled_size_offset);
|
t->buf_filled_size, t->buf_filled_size_offset);
|
||||||
|
|
@ -990,6 +1014,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
uint32_t index_max_size = 0;
|
uint32_t index_max_size = 0;
|
||||||
uint64_t index_va = 0;
|
uint64_t index_va = 0;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* draw packet */
|
/* draw packet */
|
||||||
if (index_size) {
|
if (index_size) {
|
||||||
/* Register shadowing doesn't shadow INDEX_TYPE. */
|
/* Register shadowing doesn't shadow INDEX_TYPE. */
|
||||||
|
|
@ -1017,7 +1043,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GFX_VERSION >= GFX9) {
|
if (GFX_VERSION >= GFX9) {
|
||||||
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_03090C_VGT_INDEX_TYPE, 2, index_type);
|
radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION,
|
||||||
|
R_03090C_VGT_INDEX_TYPE, 2, index_type);
|
||||||
} else {
|
} else {
|
||||||
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
|
||||||
radeon_emit(cs, index_type);
|
radeon_emit(cs, index_type);
|
||||||
|
|
@ -1032,8 +1059,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
/* Skip draw calls with 0-sized index buffers.
|
/* Skip draw calls with 0-sized index buffers.
|
||||||
* They cause a hang on some chips, like Navi10-14.
|
* They cause a hang on some chips, like Navi10-14.
|
||||||
*/
|
*/
|
||||||
if (!index_max_size)
|
if (!index_max_size) {
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
index_va = si_resource(indexbuf)->gpu_address + index_offset;
|
index_va = si_resource(indexbuf)->gpu_address + index_offset;
|
||||||
|
|
||||||
|
|
@ -1173,6 +1202,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
|
|
||||||
if (index_size) {
|
if (index_size) {
|
||||||
if (ALLOW_PRIM_DISCARD_CS && dispatch_prim_discard_cs) {
|
if (ALLOW_PRIM_DISCARD_CS && dispatch_prim_discard_cs) {
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_draws; i++) {
|
for (unsigned i = 0; i < num_draws; i++) {
|
||||||
uint64_t va = index_va + draws[0].start * original_index_size;
|
uint64_t va = index_va + draws[0].start * original_index_size;
|
||||||
|
|
||||||
|
|
@ -1238,6 +1269,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
radeon_emit(cs, draws[i].count);
|
radeon_emit(cs, draws[i].count);
|
||||||
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
|
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
EMIT_SQTT_END_DRAW;
|
EMIT_SQTT_END_DRAW;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -1265,6 +1298,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
|
||||||
sctx->last_base_vertex = draws[num_draws - 1].start;
|
sctx->last_base_vertex = draws[num_draws - 1].start;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
EMIT_SQTT_END_DRAW;
|
EMIT_SQTT_END_DRAW;
|
||||||
}
|
}
|
||||||
|
|
@ -2181,8 +2215,10 @@ void si_trace_emit(struct si_context *sctx)
|
||||||
|
|
||||||
si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id);
|
si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||||
radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
|
radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (sctx->log)
|
if (sctx->log)
|
||||||
u_log_flush(sctx->log);
|
u_log_flush(sctx->log);
|
||||||
|
|
|
||||||
|
|
@ -150,6 +150,7 @@ static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_cou
|
||||||
static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
|
static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
|
||||||
uint32_t sample_locs)
|
uint32_t sample_locs)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
|
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
|
||||||
radeon_emit(cs, centroid_priority);
|
radeon_emit(cs, centroid_priority);
|
||||||
radeon_emit(cs, centroid_priority >> 32);
|
radeon_emit(cs, centroid_priority >> 32);
|
||||||
|
|
@ -157,11 +158,13 @@ static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroi
|
||||||
radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
|
radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
|
||||||
radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
|
radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
|
||||||
radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
|
radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
|
static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
|
||||||
const uint32_t *sample_locs, unsigned num_samples)
|
const uint32_t *sample_locs, unsigned num_samples)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
|
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
|
||||||
radeon_emit(cs, centroid_priority);
|
radeon_emit(cs, centroid_priority);
|
||||||
radeon_emit(cs, centroid_priority >> 32);
|
radeon_emit(cs, centroid_priority >> 32);
|
||||||
|
|
@ -171,6 +174,7 @@ static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centro
|
||||||
radeon_emit_array(cs, sample_locs, 4);
|
radeon_emit_array(cs, sample_locs, 4);
|
||||||
radeon_emit_array(cs, sample_locs, 4);
|
radeon_emit_array(cs, sample_locs, 4);
|
||||||
radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4);
|
radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
|
void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
|
||||||
|
|
|
||||||
|
|
@ -566,11 +566,10 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
static void si_emit_shader_es(struct si_context *sctx)
|
static void si_emit_shader_es(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.es->shader;
|
struct si_shader *shader = sctx->queued.named.es->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
||||||
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
|
||||||
shader->selector->esgs_itemsize / 4);
|
shader->selector->esgs_itemsize / 4);
|
||||||
|
|
@ -583,9 +582,7 @@ static void si_emit_shader_es(struct si_context *sctx)
|
||||||
radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
||||||
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
||||||
shader->vgt_vertex_reuse_block_cntl);
|
shader->vgt_vertex_reuse_block_cntl);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
@ -729,11 +726,11 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
||||||
static void si_emit_shader_gs(struct si_context *sctx)
|
static void si_emit_shader_gs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.gs->shader;
|
struct si_shader *shader = sctx->queued.named.gs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
|
|
||||||
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
|
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
|
||||||
* R_028A68_VGT_GSVS_RING_OFFSET_3 */
|
* R_028A68_VGT_GSVS_RING_OFFSET_3 */
|
||||||
radeon_opt_set_context_reg3(
|
radeon_opt_set_context_reg3(
|
||||||
|
|
@ -782,9 +779,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
||||||
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
||||||
shader->vgt_vertex_reuse_block_cntl);
|
shader->vgt_vertex_reuse_block_cntl);
|
||||||
}
|
}
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
@ -931,6 +926,8 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
|
||||||
sctx->tracked_regs.reg_value[reg] != value) {
|
sctx->tracked_regs.reg_value[reg] != value) {
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
if (sctx->chip_class == GFX10) {
|
if (sctx->chip_class == GFX10) {
|
||||||
/* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
|
/* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
|
|
@ -938,6 +935,7 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
|
||||||
}
|
}
|
||||||
|
|
||||||
radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value);
|
radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
|
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
|
||||||
sctx->tracked_regs.reg_value[reg] = value;
|
sctx->tracked_regs.reg_value[reg] = value;
|
||||||
|
|
@ -945,9 +943,9 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Common tail code for NGG primitive shaders. */
|
/* Common tail code for NGG primitive shaders. */
|
||||||
static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader,
|
static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader)
|
||||||
unsigned initial_cdw)
|
|
||||||
{
|
{
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
|
radeon_opt_set_context_reg(sctx, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||||
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
|
||||||
shader->ctx_reg.ngg.ge_max_output_per_subgroup);
|
shader->ctx_reg.ngg.ge_max_output_per_subgroup);
|
||||||
|
|
@ -975,9 +973,7 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
||||||
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
|
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
|
||||||
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
|
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
|
||||||
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
|
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
|
|
||||||
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
|
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
|
||||||
gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc);
|
gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc);
|
||||||
|
|
@ -986,56 +982,55 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
||||||
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
|
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.gs->shader;
|
struct si_shader *shader = sctx->queued.named.gs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw);
|
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx)
|
static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.gs->shader;
|
struct si_shader *shader = sctx->queued.named.gs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
||||||
shader->vgt_tf_param);
|
shader->vgt_tf_param);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
|
|
||||||
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw);
|
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx)
|
static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.gs->shader;
|
struct si_shader *shader = sctx->queued.named.gs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
|
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
|
||||||
shader->ctx_reg.ngg.vgt_gs_max_vert_out);
|
shader->ctx_reg.ngg.vgt_gs_max_vert_out);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
|
|
||||||
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw);
|
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
|
static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.gs->shader;
|
struct si_shader *shader = sctx->queued.named.gs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
|
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
|
||||||
shader->ctx_reg.ngg.vgt_gs_max_vert_out);
|
shader->ctx_reg.ngg.vgt_gs_max_vert_out);
|
||||||
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
|
||||||
shader->vgt_tf_param);
|
shader->vgt_tf_param);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
|
|
||||||
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw);
|
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned si_get_input_prim(const struct si_shader_selector *gs)
|
unsigned si_get_input_prim(const struct si_shader_selector *gs)
|
||||||
|
|
@ -1308,11 +1303,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||||
static void si_emit_shader_vs(struct si_context *sctx)
|
static void si_emit_shader_vs(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.vs->shader;
|
struct si_shader *shader = sctx->queued.named.vs->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_028A40_VGT_GS_MODE, SI_TRACKED_VGT_GS_MODE,
|
radeon_opt_set_context_reg(sctx, R_028A40_VGT_GS_MODE, SI_TRACKED_VGT_GS_MODE,
|
||||||
shader->ctx_reg.vs.vgt_gs_mode);
|
shader->ctx_reg.vs.vgt_gs_mode);
|
||||||
radeon_opt_set_context_reg(sctx, R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN,
|
radeon_opt_set_context_reg(sctx, R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN,
|
||||||
|
|
@ -1356,9 +1350,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
|
||||||
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
|
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
|
||||||
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
|
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
|
||||||
}
|
}
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
|
|
||||||
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
|
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
|
||||||
if (sctx->chip_class >= GFX10)
|
if (sctx->chip_class >= GFX10)
|
||||||
|
|
@ -1536,11 +1528,10 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||||
static void si_emit_shader_ps(struct si_context *sctx)
|
static void si_emit_shader_ps(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct si_shader *shader = sctx->queued.named.ps->shader;
|
struct si_shader *shader = sctx->queued.named.ps->shader;
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
|
||||||
|
|
||||||
if (!shader)
|
if (!shader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
/* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
|
/* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
|
||||||
radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA,
|
radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA,
|
||||||
shader->ctx_reg.ps.spi_ps_input_ena,
|
shader->ctx_reg.ps.spi_ps_input_ena,
|
||||||
|
|
@ -1558,9 +1549,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
|
||||||
|
|
||||||
radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK,
|
radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK,
|
||||||
shader->ctx_reg.ps.cb_shader_mask);
|
shader->ctx_reg.ps.cb_shader_mask);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
|
@ -3371,12 +3360,10 @@ static void si_emit_spi_map(struct si_context *sctx)
|
||||||
/* R_028644_SPI_PS_INPUT_CNTL_0 */
|
/* R_028644_SPI_PS_INPUT_CNTL_0 */
|
||||||
/* Dota 2: Only ~16% of SPI map updates set different values. */
|
/* Dota 2: Only ~16% of SPI map updates set different values. */
|
||||||
/* Talos: Only ~9% of SPI map updates set different values. */
|
/* Talos: Only ~9% of SPI map updates set different values. */
|
||||||
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl,
|
radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl,
|
||||||
sctx->tracked_regs.spi_ps_input_cntl, num_interp);
|
sctx->tracked_regs.spi_ps_input_cntl, num_interp);
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
if (initial_cdw != sctx->gfx_cs.current.cdw)
|
|
||||||
sctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -3405,6 +3392,8 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx)
|
||||||
*/
|
*/
|
||||||
static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
|
static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
|
||||||
{
|
{
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* This is required before VGT_FLUSH. */
|
/* This is required before VGT_FLUSH. */
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||||
|
|
@ -3412,6 +3401,7 @@ static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
|
||||||
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
|
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize state related to ESGS / GSVS ring buffers */
|
/* Initialize state related to ESGS / GSVS ring buffers */
|
||||||
|
|
@ -3505,6 +3495,8 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||||
|
|
||||||
si_emit_vgt_flush(cs);
|
si_emit_vgt_flush(cs);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Set the GS registers. */
|
/* Set the GS registers. */
|
||||||
if (sctx->esgs_ring) {
|
if (sctx->esgs_ring) {
|
||||||
assert(sctx->chip_class <= GFX8);
|
assert(sctx->chip_class <= GFX8);
|
||||||
|
|
@ -3515,6 +3507,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||||
radeon_set_uconfig_reg(cs, R_030904_VGT_GSVS_RING_SIZE,
|
radeon_set_uconfig_reg(cs, R_030904_VGT_GSVS_RING_SIZE,
|
||||||
sctx->gsvs_ring->width0 / 256);
|
sctx->gsvs_ring->width0 / 256);
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3789,6 +3782,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||||
si_emit_vgt_flush(cs);
|
si_emit_vgt_flush(cs);
|
||||||
|
|
||||||
/* Set tessellation registers. */
|
/* Set tessellation registers. */
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
|
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
|
||||||
S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
|
S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
|
||||||
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8);
|
||||||
|
|
@ -3801,6 +3795,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
|
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
|
||||||
sctx->screen->vgt_hs_offchip_param);
|
sctx->screen->vgt_hs_offchip_param);
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4153,7 +4148,9 @@ static void si_emit_scratch_state(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, sctx->spi_tmpring_size);
|
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, sctx->spi_tmpring_size);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
if (sctx->scratch_buffer) {
|
if (sctx->scratch_buffer) {
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE,
|
||||||
|
|
|
||||||
|
|
@ -221,6 +221,8 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
|
||||||
last_target = i;
|
last_target = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
for (unsigned i = 0; i < sctx->streamout.num_targets; i++) {
|
for (unsigned i = 0; i < sctx->streamout.num_targets; i++) {
|
||||||
if (!t[i])
|
if (!t[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -246,6 +248,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
|
||||||
radeon_emit(cs, 0);
|
radeon_emit(cs, 0);
|
||||||
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->streamout.begin_emitted = true;
|
sctx->streamout.begin_emitted = true;
|
||||||
}
|
}
|
||||||
|
|
@ -275,6 +278,8 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
|
||||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||||
unsigned reg_strmout_cntl;
|
unsigned reg_strmout_cntl;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* The register is at different places on different ASICs. */
|
/* The register is at different places on different ASICs. */
|
||||||
if (sctx->chip_class >= GFX7) {
|
if (sctx->chip_class >= GFX7) {
|
||||||
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
||||||
|
|
@ -295,6 +300,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
|
||||||
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
|
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
|
||||||
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
|
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_streamout_begin(struct si_context *sctx)
|
static void si_emit_streamout_begin(struct si_context *sctx)
|
||||||
|
|
@ -306,6 +312,8 @@ static void si_emit_streamout_begin(struct si_context *sctx)
|
||||||
|
|
||||||
si_flush_vgt_streamout(sctx);
|
si_flush_vgt_streamout(sctx);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
for (i = 0; i < sctx->streamout.num_targets; i++) {
|
for (i = 0; i < sctx->streamout.num_targets; i++) {
|
||||||
if (!t[i])
|
if (!t[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -344,6 +352,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
|
||||||
radeon_emit(cs, 0); /* unused */
|
radeon_emit(cs, 0); /* unused */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
sctx->streamout.begin_emitted = true;
|
sctx->streamout.begin_emitted = true;
|
||||||
}
|
}
|
||||||
|
|
@ -362,6 +371,8 @@ void si_emit_streamout_end(struct si_context *sctx)
|
||||||
|
|
||||||
si_flush_vgt_streamout(sctx);
|
si_flush_vgt_streamout(sctx);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
for (i = 0; i < sctx->streamout.num_targets; i++) {
|
for (i = 0; i < sctx->streamout.num_targets; i++) {
|
||||||
if (!t[i])
|
if (!t[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -383,10 +394,10 @@ void si_emit_streamout_end(struct si_context *sctx)
|
||||||
* buffer bound. This ensures that the primitives-emitted query
|
* buffer bound. This ensures that the primitives-emitted query
|
||||||
* won't increment. */
|
* won't increment. */
|
||||||
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
|
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
|
||||||
sctx->context_roll = true;
|
|
||||||
|
|
||||||
t[i]->buf_filled_size_valid = true;
|
t[i]->buf_filled_size_valid = true;
|
||||||
}
|
}
|
||||||
|
radeon_end_update_context_roll(sctx);
|
||||||
|
|
||||||
sctx->streamout.begin_emitted = false;
|
sctx->streamout.begin_emitted = false;
|
||||||
}
|
}
|
||||||
|
|
@ -402,6 +413,7 @@ static void si_emit_streamout_enable(struct si_context *sctx)
|
||||||
{
|
{
|
||||||
assert(!sctx->screen->use_ngg_streamout);
|
assert(!sctx->screen->use_ngg_streamout);
|
||||||
|
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_set_context_reg_seq(&sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
|
radeon_set_context_reg_seq(&sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
|
||||||
radeon_emit(&sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
|
radeon_emit(&sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
|
||||||
S_028B94_RAST_STREAM(0) |
|
S_028B94_RAST_STREAM(0) |
|
||||||
|
|
@ -410,6 +422,7 @@ static void si_emit_streamout_enable(struct si_context *sctx)
|
||||||
S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
|
S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
|
||||||
radeon_emit(&sctx->gfx_cs,
|
radeon_emit(&sctx->gfx_cs,
|
||||||
sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask);
|
sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask);
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_set_streamout_enable(struct si_context *sctx, bool enable)
|
static void si_set_streamout_enable(struct si_context *sctx, bool enable)
|
||||||
|
|
|
||||||
|
|
@ -103,8 +103,10 @@ static void si_emit_cull_state(struct si_context *sctx)
|
||||||
/* This will end up in SGPR6 as (value << 8), shifted by the hw. */
|
/* This will end up in SGPR6 as (value << 8), shifted by the hw. */
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf,
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf,
|
||||||
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
|
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
|
||||||
|
radeon_begin(&sctx->gfx_cs);
|
||||||
radeon_set_sh_reg(&sctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS,
|
radeon_set_sh_reg(&sctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS,
|
||||||
sctx->small_prim_cull_info_address >> 8);
|
sctx->small_prim_cull_info_address >> 8);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
/* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling.
|
/* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling.
|
||||||
*
|
*
|
||||||
|
|
@ -213,18 +215,22 @@ static void si_emit_one_scissor(struct si_context *ctx, struct radeon_cmdbuf *cs
|
||||||
if (scissor)
|
if (scissor)
|
||||||
si_clip_scissor(&final, scissor);
|
si_clip_scissor(&final, scissor);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
|
|
||||||
/* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
|
/* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
|
||||||
* SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
|
* SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
|
||||||
*/
|
*/
|
||||||
if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
|
if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
|
||||||
radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1));
|
radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||||
radeon_emit(cs, S_028254_BR_X(1) | S_028254_BR_Y(1));
|
radeon_emit(cs, S_028254_BR_X(1) | S_028254_BR_Y(1));
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) |
|
radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) |
|
||||||
S_028250_WINDOW_OFFSET_DISABLE(1));
|
S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||||
radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy));
|
radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
|
#define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
|
||||||
|
|
@ -350,7 +356,7 @@ static void si_emit_guardband(struct si_context *ctx)
|
||||||
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
|
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
|
||||||
* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
|
* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
|
||||||
*/
|
*/
|
||||||
unsigned initial_cdw = ctx->gfx_cs.current.cdw;
|
radeon_begin(&ctx->gfx_cs);
|
||||||
radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
|
radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
|
||||||
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y),
|
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y),
|
||||||
fui(guardband_x), fui(discard_x));
|
fui(guardband_x), fui(discard_x));
|
||||||
|
|
@ -362,8 +368,7 @@ static void si_emit_guardband(struct si_context *ctx)
|
||||||
ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
|
ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
|
||||||
S_028BE4_PIX_CENTER(rs->half_pixel_center) |
|
S_028BE4_PIX_CENTER(rs->half_pixel_center) |
|
||||||
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode));
|
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode));
|
||||||
if (initial_cdw != ctx->gfx_cs.current.cdw)
|
radeon_end_update_context_roll(ctx);
|
||||||
ctx->context_roll = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_scissors(struct si_context *ctx)
|
static void si_emit_scissors(struct si_context *ctx)
|
||||||
|
|
@ -376,7 +381,10 @@ static void si_emit_scissors(struct si_context *ctx)
|
||||||
if (!ctx->vs_writes_viewport_index) {
|
if (!ctx->vs_writes_viewport_index) {
|
||||||
struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
|
struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
|
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
|
si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -384,7 +392,10 @@ static void si_emit_scissors(struct si_context *ctx)
|
||||||
/* All registers in the array need to be updated if any of them is changed.
|
/* All registers in the array need to be updated if any of them is changed.
|
||||||
* This is a hardware requirement.
|
* This is a hardware requirement.
|
||||||
*/
|
*/
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, SI_MAX_VIEWPORTS * 2);
|
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, SI_MAX_VIEWPORTS * 2);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
|
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
|
||||||
si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
|
si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
|
||||||
scissor_enabled ? &states[i] : NULL);
|
scissor_enabled ? &states[i] : NULL);
|
||||||
|
|
@ -477,12 +488,14 @@ static void si_emit_one_viewport(struct si_context *ctx, struct pipe_viewport_st
|
||||||
{
|
{
|
||||||
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
|
struct radeon_cmdbuf *cs = &ctx->gfx_cs;
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_emit(cs, fui(state->scale[0]));
|
radeon_emit(cs, fui(state->scale[0]));
|
||||||
radeon_emit(cs, fui(state->translate[0]));
|
radeon_emit(cs, fui(state->translate[0]));
|
||||||
radeon_emit(cs, fui(state->scale[1]));
|
radeon_emit(cs, fui(state->scale[1]));
|
||||||
radeon_emit(cs, fui(state->translate[1]));
|
radeon_emit(cs, fui(state->translate[1]));
|
||||||
radeon_emit(cs, fui(state->scale[2]));
|
radeon_emit(cs, fui(state->scale[2]));
|
||||||
radeon_emit(cs, fui(state->translate[2]));
|
radeon_emit(cs, fui(state->translate[2]));
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_viewports(struct si_context *ctx)
|
static void si_emit_viewports(struct si_context *ctx)
|
||||||
|
|
@ -492,7 +505,10 @@ static void si_emit_viewports(struct si_context *ctx)
|
||||||
|
|
||||||
/* The simple case: Only 1 viewport is active. */
|
/* The simple case: Only 1 viewport is active. */
|
||||||
if (!ctx->vs_writes_viewport_index) {
|
if (!ctx->vs_writes_viewport_index) {
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
|
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
si_emit_one_viewport(ctx, &states[0]);
|
si_emit_one_viewport(ctx, &states[0]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -500,7 +516,10 @@ static void si_emit_viewports(struct si_context *ctx)
|
||||||
/* All registers in the array need to be updated if any of them is changed.
|
/* All registers in the array need to be updated if any of them is changed.
|
||||||
* This is a hardware requirement.
|
* This is a hardware requirement.
|
||||||
*/
|
*/
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + 0, SI_MAX_VIEWPORTS * 6);
|
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + 0, SI_MAX_VIEWPORTS * 6);
|
||||||
|
radeon_end();
|
||||||
|
|
||||||
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++)
|
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++)
|
||||||
si_emit_one_viewport(ctx, &states[i]);
|
si_emit_one_viewport(ctx, &states[i]);
|
||||||
}
|
}
|
||||||
|
|
@ -528,21 +547,25 @@ static void si_emit_depth_ranges(struct si_context *ctx)
|
||||||
if (!ctx->vs_writes_viewport_index) {
|
if (!ctx->vs_writes_viewport_index) {
|
||||||
si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, &zmin, &zmax);
|
si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, &zmin, &zmax);
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
|
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
|
||||||
radeon_emit(cs, fui(zmin));
|
radeon_emit(cs, fui(zmin));
|
||||||
radeon_emit(cs, fui(zmax));
|
radeon_emit(cs, fui(zmax));
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* All registers in the array need to be updated if any of them is changed.
|
/* All registers in the array need to be updated if any of them is changed.
|
||||||
* This is a hardware requirement.
|
* This is a hardware requirement.
|
||||||
*/
|
*/
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, SI_MAX_VIEWPORTS * 2);
|
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, SI_MAX_VIEWPORTS * 2);
|
||||||
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
|
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
|
||||||
si_viewport_zmin_zmax(&states[i], clip_halfz, window_space, &zmin, &zmax);
|
si_viewport_zmin_zmax(&states[i], clip_halfz, window_space, &zmin, &zmax);
|
||||||
radeon_emit(cs, fui(zmin));
|
radeon_emit(cs, fui(zmin));
|
||||||
radeon_emit(cs, fui(zmax));
|
radeon_emit(cs, fui(zmax));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_viewport_states(struct si_context *ctx)
|
static void si_emit_viewport_states(struct si_context *ctx)
|
||||||
|
|
@ -631,16 +654,20 @@ static void si_emit_window_rectangles(struct si_context *sctx)
|
||||||
else
|
else
|
||||||
rule = outside[num_rectangles - 1];
|
rule = outside[num_rectangles - 1];
|
||||||
|
|
||||||
|
radeon_begin(cs);
|
||||||
radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE,
|
radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE,
|
||||||
rule);
|
rule);
|
||||||
if (num_rectangles == 0)
|
if (num_rectangles == 0) {
|
||||||
|
radeon_end();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2);
|
radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2);
|
||||||
for (unsigned i = 0; i < num_rectangles; i++) {
|
for (unsigned i = 0; i < num_rectangles; i++) {
|
||||||
radeon_emit(cs, S_028210_TL_X(rects[i].minx) | S_028210_TL_Y(rects[i].miny));
|
radeon_emit(cs, S_028210_TL_X(rects[i].minx) | S_028210_TL_Y(rects[i].miny));
|
||||||
radeon_emit(cs, S_028214_BR_X(rects[i].maxx) | S_028214_BR_Y(rects[i].maxy));
|
radeon_emit(cs, S_028214_BR_X(rects[i].maxx) | S_028214_BR_Y(rects[i].maxy));
|
||||||
}
|
}
|
||||||
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_set_window_rectangles(struct pipe_context *ctx, bool include,
|
static void si_set_window_rectangles(struct pipe_context *ctx, bool include,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue