radeonsi: add new possibly faster command submission helpers

This decreases the release libgallium_dri.so size without debug symbols
by 16384 bytes. The CPU time spent in si_emit_draw_packets decreased
from 4.5% to 4.1% in viewperf13/catia/plane01.

The previous code did:
    cs->current.buf[cs->current.cdw++] = ...;
    cs->current.buf[cs->current.cdw++] = ...;
    cs->current.buf[cs->current.cdw++] = ...;
    cs->current.buf[cs->current.cdw++] = ...;

The new code does:
    unsigned num = cs->current.cdw;
    uint32_t *buf = cs->current.buf;
    buf[num++] = ...;
    buf[num++] = ...;
    buf[num++] = ...;
    buf[num++] = ...;
    cs->current.cdw = num;

The code is the same (radeon_emit is redefined as a macro) except that
all set and emit functions must be surrounded by radeon_begin(cs) and
radeon_end().

radeon_packets_added() returns whether there has been any new packets added
since radeon_begin.

radeon_end_update_context_roll(sctx) sets sctx->context_roll = true
if there has been any new packets added since radeon_begin.

For now, the "cs" parameter is intentionally unused in radeon_emit and
radeon_emit_array.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8653>
This commit is contained in:
Marek Olšák 2021-01-09 15:14:22 -05:00 committed by Marge Bot
parent 3ef89b245e
commit a0978fffb8
19 changed files with 579 additions and 381 deletions

View file

@ -39,249 +39,251 @@
#define SI_CHECK_SHADOWED_REGS(reg_offset, count) #define SI_CHECK_SHADOWED_REGS(reg_offset, count)
#endif #endif
static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
{ unsigned __cs_num = __cs->current.cdw; \
SI_CHECK_SHADOWED_REGS(reg, num); UNUSED unsigned __cs_num_initial = __cs_num; \
assert(reg < SI_CONTEXT_REG_OFFSET); uint32_t *__cs_buf = __cs->current.buf
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) #define radeon_begin_again(cs) do { \
{ assert(__cs == NULL); \
radeon_set_config_reg_seq(cs, reg, 1); __cs = (cs); \
radeon_emit(cs, value); __cs_num = __cs->current.cdw; \
} __cs_num_initial = __cs_num; \
__cs_buf = __cs->current.buf; \
} while (0)
static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) #define radeon_end() do { \
{ __cs->current.cdw = __cs_num; \
SI_CHECK_SHADOWED_REGS(reg, num); assert(__cs->current.cdw <= __cs->current.max_dw); \
assert(reg >= SI_CONTEXT_REG_OFFSET); __cs = NULL; \
assert(cs->current.cdw + 2 + num <= cs->current.max_dw); } while (0)
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) #define radeon_emit(cs, value) __cs_buf[__cs_num++] = (value)
{ #define radeon_packets_added() (__cs_num != __cs_num_initial)
radeon_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_seq_array(struct radeon_cmdbuf *cs, unsigned reg, #define radeon_end_update_context_roll(sctx) do { \
unsigned num, const uint32_t *values) radeon_end(); \
{ if (radeon_packets_added()) \
radeon_set_context_reg_seq(cs, reg, num); (sctx)->context_roll = true; \
radeon_emit_array(cs, values, num); } while (0)
}
static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, #define radeon_emit_array(cs, values, num) do { \
unsigned value) unsigned __n = (num); \
{ memcpy(__cs_buf + __cs_num, (values), __n * 4); \
SI_CHECK_SHADOWED_REGS(reg, 1); __cs_num += __n; \
assert(reg >= SI_CONTEXT_REG_OFFSET); } while (0)
assert(cs->current.cdw + 3 <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) #define radeon_set_config_reg_seq(cs, reg, num) do { \
{ SI_CHECK_SHADOWED_REGS(reg, num); \
SI_CHECK_SHADOWED_REGS(reg, num); assert((reg) < SI_CONTEXT_REG_OFFSET); \
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
assert(cs->current.cdw + 2 + num <= cs->current.max_dw); radeon_emit(cs, ((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); } while (0)
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) #define radeon_set_config_reg(cs, reg, value) do { \
{ radeon_set_config_reg_seq(cs, reg, 1); \
radeon_set_sh_reg_seq(cs, reg, 1); radeon_emit(cs, value); \
radeon_emit(cs, value); } while (0)
}
static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, bool perfctr) #define radeon_set_context_reg_seq(cs, reg, num) do { \
{ SI_CHECK_SHADOWED_REGS(reg, num); \
SI_CHECK_SHADOWED_REGS(reg, num); assert((reg) >= SI_CONTEXT_REG_OFFSET); \
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
assert(cs->current.cdw + 2 + num <= cs->current.max_dw); radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); } while (0)
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) #define radeon_set_context_reg(cs, reg, value) do { \
{ radeon_set_context_reg_seq(cs, reg, 1); \
radeon_set_uconfig_reg_seq(cs, reg, 1, false); radeon_emit(cs, value); \
radeon_emit(cs, value); } while (0)
}
static inline void radeon_set_uconfig_reg_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) #define radeon_set_context_reg_seq_array(cs, reg, num, values) do { \
{ radeon_set_context_reg_seq(cs, reg, num); \
radeon_set_uconfig_reg_seq(cs, reg, 1, true); radeon_emit_array(cs, values, num); \
radeon_emit(cs, value); } while (0)
}
static inline void radeon_set_uconfig_reg_idx(struct radeon_cmdbuf *cs, struct si_screen *screen, #define radeon_set_context_reg_idx(cs, reg, idx, value) do { \
unsigned reg, unsigned idx, unsigned value) SI_CHECK_SHADOWED_REGS(reg, 1); \
{ assert((reg) >= SI_CONTEXT_REG_OFFSET); \
SI_CHECK_SHADOWED_REGS(reg, 1); radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
assert(cs->current.cdw + 3 <= cs->current.max_dw); radeon_emit(cs, value); \
assert(idx != 0); } while (0)
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (screen->info.chip_class < GFX9 ||
(screen->info.chip_class == GFX9 && screen->info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG;
radeon_emit(cs, PKT3(opcode, 1, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg, #define radeon_set_sh_reg_seq(cs, reg, num) do { \
unsigned value, unsigned mask) SI_CHECK_SHADOWED_REGS(reg, num); \
{ assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
SI_CHECK_SHADOWED_REGS(reg, 1); radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); \
assert(reg >= SI_CONTEXT_REG_OFFSET); radeon_emit(cs, ((reg) - SI_SH_REG_OFFSET) >> 2); \
assert(cs->current.cdw + 4 <= cs->current.max_dw); } while (0)
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); #define radeon_set_sh_reg(cs, reg, value) do { \
radeon_emit(cs, mask); radeon_set_sh_reg_seq(cs, reg, 1); \
radeon_emit(cs, value); radeon_emit(cs, value); \
} } while (0)
#define radeon_set_uconfig_reg_seq(cs, reg, num, perfctr) do { \
SI_CHECK_SHADOWED_REGS(reg, num); \
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
} while (0)
#define radeon_set_uconfig_reg(cs, reg, value) do { \
radeon_set_uconfig_reg_seq(cs, reg, 1, false); \
radeon_emit(cs, value); \
} while (0)
#define radeon_set_uconfig_reg_perfctr(cs, reg, value) do { \
radeon_set_uconfig_reg_seq(cs, reg, 1, true); \
radeon_emit(cs, value); \
} while (0)
#define radeon_set_uconfig_reg_idx(cs, screen, chip_class, reg, idx, value) do { \
SI_CHECK_SHADOWED_REGS(reg, 1); \
assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
assert((idx) != 0); \
unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
if ((chip_class) < GFX9 || \
((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \
__opcode = PKT3_SET_UCONFIG_REG; \
radeon_emit(cs, PKT3(__opcode, 1, 0)); \
radeon_emit(cs, ((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
radeon_emit(cs, value); \
} while (0)
#define radeon_set_context_reg_rmw(cs, reg, value, mask) do { \
SI_CHECK_SHADOWED_REGS(reg, 1); \
assert((reg) >= SI_CONTEXT_REG_OFFSET); \
radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); \
radeon_emit(cs, ((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
radeon_emit(cs, mask); \
radeon_emit(cs, value); \
} while (0)
/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */ /* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */
static inline void radeon_opt_set_context_reg_rmw(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_reg_rmw(sctx, offset, reg, val, mask) do { \
enum si_tracked_reg reg, unsigned value, unsigned __value = (val); \
unsigned mask) assert((__value & ~mask) == 0); \
{ __value &= mask; \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
sctx->tracked_regs.reg_value[reg] != __value) { \
assert((value & ~mask) == 0); radeon_set_context_reg_rmw(&sctx->gfx_cs, offset, __value, mask); \
value &= mask; sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
sctx->tracked_regs.reg_value[reg] = __value; \
if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 || } \
sctx->tracked_regs.reg_value[reg] != value) { } while (0)
radeon_set_context_reg_rmw(cs, offset, value, mask);
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
sctx->tracked_regs.reg_value[reg] = value;
}
}
/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */ /* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
enum si_tracked_reg reg, unsigned value) unsigned __value = val; \
{ if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; sctx->tracked_regs.reg_value[reg] != __value) { \
radeon_set_context_reg(&sctx->gfx_cs, offset, __value); \
if (((sctx->tracked_regs.reg_saved >> reg) & 0x1) != 0x1 || sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
sctx->tracked_regs.reg_value[reg] != value) { sctx->tracked_regs.reg_value[reg] = __value; \
radeon_set_context_reg(cs, offset, value); } \
} while (0)
sctx->tracked_regs.reg_saved |= 0x1ull << reg;
sctx->tracked_regs.reg_value[reg] = value;
}
}
/** /**
* Set 2 consecutive registers if any registers value is different. * Set 2 consecutive registers if any registers value is different.
* @param offset starting register offset * @param offset starting register offset
* @param value1 is written to first register * @param val1 is written to first register
* @param value2 is written to second register * @param val2 is written to second register
*/ */
static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
enum si_tracked_reg reg, unsigned value1, unsigned __value1 = (val1), __value2 = (val2); \
unsigned value2) if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
{ sctx->tracked_regs.reg_value[reg] != __value1 || \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 2); \
if (((sctx->tracked_regs.reg_saved >> reg) & 0x3) != 0x3 || radeon_emit(cs, __value1); \
sctx->tracked_regs.reg_value[reg] != value1 || radeon_emit(cs, __value2); \
sctx->tracked_regs.reg_value[reg + 1] != value2) { sctx->tracked_regs.reg_value[reg] = __value1; \
radeon_set_context_reg_seq(cs, offset, 2); sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
radeon_emit(cs, value1); sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
radeon_emit(cs, value2); } \
} while (0)
sctx->tracked_regs.reg_value[reg] = value1;
sctx->tracked_regs.reg_value[reg + 1] = value2;
sctx->tracked_regs.reg_saved |= 0x3ull << reg;
}
}
/** /**
* Set 3 consecutive registers if any registers value is different. * Set 3 consecutive registers if any registers value is different.
*/ */
static inline void radeon_opt_set_context_reg3(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
enum si_tracked_reg reg, unsigned value1, unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
unsigned value2, unsigned value3) if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
{ sctx->tracked_regs.reg_value[reg] != __value1 || \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
if (((sctx->tracked_regs.reg_saved >> reg) & 0x7) != 0x7 || radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 3); \
sctx->tracked_regs.reg_value[reg] != value1 || radeon_emit(cs, __value1); \
sctx->tracked_regs.reg_value[reg + 1] != value2 || radeon_emit(cs, __value2); \
sctx->tracked_regs.reg_value[reg + 2] != value3) { radeon_emit(cs, __value3); \
radeon_set_context_reg_seq(cs, offset, 3); sctx->tracked_regs.reg_value[reg] = __value1; \
radeon_emit(cs, value1); sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
radeon_emit(cs, value2); sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
radeon_emit(cs, value3); sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
} \
sctx->tracked_regs.reg_value[reg] = value1; } while (0)
sctx->tracked_regs.reg_value[reg + 1] = value2;
sctx->tracked_regs.reg_value[reg + 2] = value3;
sctx->tracked_regs.reg_saved |= 0x7ull << reg;
}
}
/** /**
* Set 4 consecutive registers if any registers value is different. * Set 4 consecutive registers if any registers value is different.
*/ */
static inline void radeon_opt_set_context_reg4(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
enum si_tracked_reg reg, unsigned value1, unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
unsigned value2, unsigned value3, unsigned value4) if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
{ sctx->tracked_regs.reg_value[reg] != __value1 || \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
if (((sctx->tracked_regs.reg_saved >> reg) & 0xf) != 0xf || sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
sctx->tracked_regs.reg_value[reg] != value1 || radeon_set_context_reg_seq(&sctx->gfx_cs, offset, 4); \
sctx->tracked_regs.reg_value[reg + 1] != value2 || radeon_emit(cs, __value1); \
sctx->tracked_regs.reg_value[reg + 2] != value3 || radeon_emit(cs, __value2); \
sctx->tracked_regs.reg_value[reg + 3] != value4) { radeon_emit(cs, __value3); \
radeon_set_context_reg_seq(cs, offset, 4); radeon_emit(cs, __value4); \
radeon_emit(cs, value1); sctx->tracked_regs.reg_value[reg] = __value1; \
radeon_emit(cs, value2); sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
radeon_emit(cs, value3); sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
radeon_emit(cs, value4); sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
sctx->tracked_regs.reg_value[reg] = value1; } \
sctx->tracked_regs.reg_value[reg + 1] = value2; } while (0)
sctx->tracked_regs.reg_value[reg + 2] = value3;
sctx->tracked_regs.reg_value[reg + 3] = value4;
sctx->tracked_regs.reg_saved |= 0xfull << reg;
}
}
/** /**
* Set consecutive registers if any registers value is different. * Set consecutive registers if any registers value is different.
*/ */
static inline void radeon_opt_set_context_regn(struct si_context *sctx, unsigned offset, #define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
unsigned *value, unsigned *saved_val, unsigned num) for (unsigned i = 0; i < (num); i++) { \
{ if ((saved_val)[i] != (value)[i]) { \
struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_set_context_reg_seq(&(sctx)->gfx_cs, offset, num); \
for (unsigned j = 0; j < (num); j++) \
radeon_emit(cs, value[j]); \
memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
break; \
} \
} \
} while (0)
for (unsigned i = 0; i < num; i++) { #define radeon_set_privileged_config_reg(cs, reg, value) do { \
if (saved_val[i] != value[i]) { assert((reg) < CIK_UCONFIG_REG_OFFSET); \
radeon_set_context_reg_seq(cs, offset, num); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); \
for (unsigned j = 0; j < num; j++) radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
radeon_emit(cs, value[j]); COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
radeon_emit(cs, value); \
radeon_emit(cs, 0); /* unused */ \
radeon_emit(cs, (reg) >> 2); \
radeon_emit(cs, 0); /* unused */ \
} while (0)
memcpy(saved_val, value, sizeof(uint32_t) * num); #define radeon_emit_32bit_pointer(sscreen, cs, va) do { \
break; radeon_emit(cs, va); \
} assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
} } while (0)
}
#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, 1); \
radeon_emit_32bit_pointer(sctx->screen, cs, (desc)->gpu_address); \
} while (0)
/* This should be evaluated at compile time if all parameters are constants. */ /* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned static ALWAYS_INLINE unsigned

View file

@ -349,6 +349,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf
{ {
uint64_t bc_va = sctx->border_color_buffer->gpu_address; uint64_t bc_va = sctx->border_color_buffer->gpu_address;
radeon_begin(cs);
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */ * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
@ -404,6 +405,7 @@ void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0); radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
} }
radeon_end();
} }
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader, static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader,
@ -505,6 +507,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ, radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_BINARY); RADEON_PRIO_SHADER_BINARY);
radeon_begin(cs);
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2); radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cs, shader_va >> 8); radeon_emit(cs, shader_va >> 8);
radeon_emit(cs, S_00B834_DATA(shader_va >> 40)); radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
@ -524,6 +527,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE, radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
S_00B860_WAVES(sctx->scratch_waves) | S_00B860_WAVES(sctx->scratch_waves) |
S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10)); S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
radeon_end();
sctx->cs_shader_state.emitted_program = program; sctx->cs_shader_state.emitted_program = program;
sctx->cs_shader_state.offset = offset; sctx->cs_shader_state.offset = offset;
@ -562,11 +566,13 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx,
} }
} }
radeon_begin(cs);
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + (user_sgpr * 4), 4); radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + (user_sgpr * 4), 4);
radeon_emit(cs, scratch_dword0); radeon_emit(cs, scratch_dword0);
radeon_emit(cs, scratch_dword1); radeon_emit(cs, scratch_dword1);
radeon_emit(cs, scratch_dword2); radeon_emit(cs, scratch_dword2);
radeon_emit(cs, scratch_dword3); radeon_emit(cs, scratch_dword3);
radeon_end();
} }
static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object, static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object,
@ -589,6 +595,8 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
user_sgpr += 4; user_sgpr += 4;
} }
radeon_begin(cs);
if (AMD_HSA_BITS_GET(code_object->code_properties, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) { if (AMD_HSA_BITS_GET(code_object->code_properties, AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) {
struct dispatch_packet dispatch; struct dispatch_packet dispatch;
unsigned dispatch_offset; unsigned dispatch_offset;
@ -646,6 +654,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_
user_sgpr += 1; user_sgpr += 1;
} }
} }
radeon_end();
} }
static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object, static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object,
@ -693,13 +702,18 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
12 * sel->info.uses_grid_size; 12 * sel->info.uses_grid_size;
unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size; unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size;
radeon_begin(cs);
if (sel->info.uses_grid_size) { if (sel->info.uses_grid_size) {
if (info->indirect) { if (info->indirect) {
radeon_end();
for (unsigned i = 0; i < 3; ++i) { for (unsigned i = 0; i < 3; ++i) {
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i, si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
COPY_DATA_SRC_MEM, si_resource(info->indirect), COPY_DATA_SRC_MEM, si_resource(info->indirect),
info->indirect_offset + 4 * i); info->indirect_offset + 4 * i);
} }
radeon_begin_again(cs);
} else { } else {
radeon_set_sh_reg_seq(cs, grid_size_reg, 3); radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
radeon_emit(cs, info->grid[0]); radeon_emit(cs, info->grid[0]);
@ -719,6 +733,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
radeon_set_sh_reg_seq(cs, cs_user_data_reg, sel->info.base.cs.user_data_components_amd); radeon_set_sh_reg_seq(cs, cs_user_data_reg, sel->info.base.cs.user_data_components_amd);
radeon_emit_array(cs, sctx->cs_user_data, sel->info.base.cs.user_data_components_amd); radeon_emit_array(cs, sctx->cs_user_data, sel->info.base.cs.user_data_components_amd);
} }
radeon_end();
} }
static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info) static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info)
@ -734,6 +749,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1) if (sctx->chip_class >= GFX10 && waves_per_threadgroup == 1)
threadgroups_per_cu = 2; threadgroups_per_cu = 2;
radeon_begin(cs);
radeon_set_sh_reg( radeon_set_sh_reg(
cs, R_00B854_COMPUTE_RESOURCE_LIMITS, cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
ac_get_compute_resource_limits(&sscreen->info, waves_per_threadgroup, ac_get_compute_resource_limits(&sscreen->info, waves_per_threadgroup,
@ -795,9 +811,10 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
} }
if (unlikely(sctx->thread_trace_enabled && sctx->chip_class >= GFX9)) { if (unlikely(sctx->thread_trace_enabled && sctx->chip_class >= GFX9)) {
radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(&sctx->gfx_cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
} }
radeon_end();
} }
static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)

View file

@ -1084,8 +1084,10 @@ si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe
*/ */
if (!radeon_emitted(gfx_cs, sctx->initial_gfx_cs_size) && if (!radeon_emitted(gfx_cs, sctx->initial_gfx_cs_size) &&
gfx_cs->current.cdw + need_gfx_dw > gfx_cs->current.max_dw) { gfx_cs->current.cdw + need_gfx_dw > gfx_cs->current.max_dw) {
radeon_begin(gfx_cs);
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(gfx_cs, 0); radeon_emit(gfx_cs, 0);
radeon_end();
} }
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
@ -1184,6 +1186,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
* TTM buffer moves in the kernel. * TTM buffer moves in the kernel.
*/ */
if (sctx->chip_class >= GFX10) { if (sctx->chip_class >= GFX10) {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
radeon_emit(cs, 0); /* CP_COHER_CNTL */ radeon_emit(cs, 0); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
@ -1195,6 +1198,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
S_586_GLI_INV(V_586_GLI_ALL) | S_586_GLK_INV(1) | S_586_GLV_INV(1) | S_586_GLI_INV(V_586_GLI_ALL) | S_586_GLK_INV(1) | S_586_GLV_INV(1) |
S_586_GL1_INV(1) | S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) | S_586_GL1_INV(1) | S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) |
S_586_GLM_WB(1) | S_586_SEQ(V_586_SEQ_FORWARD)); S_586_GLM_WB(1) | S_586_SEQ(V_586_SEQ_FORWARD));
radeon_end();
} else { } else {
si_emit_surface_sync(sctx, cs, si_emit_surface_sync(sctx, cs,
S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
@ -1211,6 +1215,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
si_emit_initial_compute_regs(sctx, cs); si_emit_initial_compute_regs(sctx, cs);
radeon_begin(cs);
radeon_set_sh_reg( radeon_set_sh_reg(
cs, R_00B860_COMPUTE_TMPRING_SIZE, cs, R_00B860_COMPUTE_TMPRING_SIZE,
S_00B860_WAVES(sctx->scratch_waves) | S_00B860_WAVESIZE(0)); /* no scratch */ S_00B860_WAVES(sctx->scratch_waves) | S_00B860_WAVESIZE(0)); /* no scratch */
@ -1231,6 +1236,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, S_03107C_ENABLE(0)); radeon_emit(cs, S_03107C_ENABLE(0));
} }
radeon_end();
if (sctx->last_ib_barrier_buf) { if (sctx->last_ib_barrier_buf) {
assert(!sctx->last_ib_barrier_fence); assert(!sctx->last_ib_barrier_fence);
@ -1349,6 +1355,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
* in parallel with compute shaders. * in parallel with compute shaders.
*/ */
if (first_dispatch) { if (first_dispatch) {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + gds_size / 4, 0)); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + gds_size / 4, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_GDS) | S_370_WR_CONFIRM(1)); radeon_emit(cs, S_370_DST_SEL(V_370_GDS) | S_370_WR_CONFIRM(1));
radeon_emit(cs, gds_offset); radeon_emit(cs, gds_offset);
@ -1356,6 +1363,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
radeon_emit(cs, 0); /* value to write */ radeon_emit(cs, 0); /* value to write */
if (gds_size == 8) if (gds_size == 8)
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_end();
} }
} }
@ -1370,6 +1378,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
assert(shader->config.scratch_bytes_per_wave == 0); assert(shader->config.scratch_bytes_per_wave == 0);
assert(shader->config.num_vgprs * WAVES_PER_TG <= 256 * 4); assert(shader->config.num_vgprs * WAVES_PER_TG <= 256 * 4);
radeon_begin(cs);
radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2); radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cs, shader_va >> 8); radeon_emit(cs, shader_va >> 8);
radeon_emit(cs, S_00B834_DATA(shader_va >> 40)); radeon_emit(cs, S_00B834_DATA(shader_va >> 40));
@ -1390,6 +1399,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
ac_get_compute_resource_limits(&sctx->screen->info, WAVES_PER_TG, ac_get_compute_resource_limits(&sctx->screen->info, WAVES_PER_TG,
MAX_WAVES_PER_SH, THREADGROUPS_PER_CU)); MAX_WAVES_PER_SH, THREADGROUPS_PER_CU));
radeon_end();
sctx->compute_ib_last_shader = shader; sctx->compute_ib_last_shader = shader;
} }
@ -1417,8 +1427,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
sctx->compute_rewind_va = gfx_cs->gpu_address + (gfx_cs->current.cdw + 1) * 4; sctx->compute_rewind_va = gfx_cs->gpu_address + (gfx_cs->current.cdw + 1) * 4;
if (sctx->chip_class <= GFX7 || FORCE_REWIND_EMULATION) { if (sctx->chip_class <= GFX7 || FORCE_REWIND_EMULATION) {
radeon_begin(gfx_cs);
radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(gfx_cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(gfx_cs, 0); radeon_emit(gfx_cs, 0);
radeon_end();
si_cp_wait_mem( si_cp_wait_mem(
sctx, gfx_cs, sctx, gfx_cs,
@ -1430,8 +1442,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
*/ */
sctx->ws->cs_check_space(gfx_cs, 0, true); sctx->ws->cs_check_space(gfx_cs, 0, true);
} else { } else {
radeon_begin(gfx_cs);
radeon_emit(gfx_cs, PKT3(PKT3_REWIND, 0, 0)); radeon_emit(gfx_cs, PKT3(PKT3_REWIND, 0, 0));
radeon_emit(gfx_cs, 0); radeon_emit(gfx_cs, 0);
radeon_end();
} }
} }
@ -1441,12 +1455,16 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
uint64_t index_va = out_indexbuf_va + start_prim * 12; uint64_t index_va = out_indexbuf_va + start_prim * 12;
/* Emit the draw packet into the gfx IB. */ /* Emit the draw packet into the gfx IB. */
radeon_begin(gfx_cs);
radeon_emit(gfx_cs, PKT3(PKT3_DRAW_INDEX_2, 4, 0)); radeon_emit(gfx_cs, PKT3(PKT3_DRAW_INDEX_2, 4, 0));
radeon_emit(gfx_cs, num_prims * vertices_per_prim); radeon_emit(gfx_cs, num_prims * vertices_per_prim);
radeon_emit(gfx_cs, index_va); radeon_emit(gfx_cs, index_va);
radeon_emit(gfx_cs, index_va >> 32); radeon_emit(gfx_cs, index_va >> 32);
radeon_emit(gfx_cs, 0); radeon_emit(gfx_cs, 0);
radeon_emit(gfx_cs, V_0287F0_DI_SRC_SEL_DMA); radeon_emit(gfx_cs, V_0287F0_DI_SRC_SEL_DMA);
radeon_end();
radeon_begin_again(cs);
/* Continue with the compute IB. */ /* Continue with the compute IB. */
if (start_prim == 0) { if (start_prim == 0) {
@ -1503,6 +1521,7 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
radeon_emit(cs, S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_PARTIAL_TG_EN(!!partial_block_size) | radeon_emit(cs, S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_PARTIAL_TG_EN(!!partial_block_size) |
S_00B800_ORDERED_APPEND_ENBL(VERTEX_COUNTER_GDS_MODE == 2) | S_00B800_ORDERED_APPEND_ENBL(VERTEX_COUNTER_GDS_MODE == 2) |
S_00B800_ORDER_MODE(0 /* launch in order */)); S_00B800_ORDER_MODE(0 /* launch in order */));
radeon_end();
/* This is only for unordered append. Ordered append writes this from /* This is only for unordered append. Ordered append writes this from
* the shader. * the shader.

View file

@ -24,6 +24,7 @@
#include "si_pipe.h" #include "si_pipe.h"
#include "sid.h" #include "sid.h"
#include "si_build_pm4.h"
/* Set this if you want the ME to wait until CP DMA is done. /* Set this if you want the ME to wait until CP DMA is done.
* It should be set on the last CP DMA packet. */ * It should be set on the last CP DMA packet. */
@ -102,6 +103,8 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
} }
radeon_begin(cs);
if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX7) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header); radeon_emit(cs, header);
@ -130,6 +133,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0); radeon_emit(cs, 0);
} }
radeon_end();
} }
void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs) void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs)
@ -428,6 +432,7 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
} }
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header); radeon_emit(cs, header);
radeon_emit(cs, address); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, address); /* SRC_ADDR_LO [31:0] */
@ -435,6 +440,7 @@ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
radeon_emit(cs, address); /* DST_ADDR_LO [31:0] */ radeon_emit(cs, address); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, address >> 32); /* DST_ADDR_HI [31:0] */ radeon_emit(cs, address >> 32); /* DST_ADDR_HI [31:0] */
radeon_emit(cs, command); radeon_emit(cs, command);
radeon_end();
} }
void si_test_gds(struct si_context *sctx) void si_test_gds(struct si_context *sctx)
@ -495,11 +501,13 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned
radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
uint64_t va = buf->gpu_address + offset; uint64_t va = buf->gpu_address + offset;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size / 4, 0)); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size / 4, 0));
radeon_emit(cs, S_370_DST_SEL(dst_sel) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine)); radeon_emit(cs, S_370_DST_SEL(dst_sel) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_emit_array(cs, (const uint32_t *)data, size / 4); radeon_emit_array(cs, (const uint32_t *)data, size / 4);
radeon_end();
} }
void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel, void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel,
@ -517,10 +525,12 @@ void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned
uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset; uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset;
uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset; uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) | COPY_DATA_DST_SEL(dst_sel) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, src_va); radeon_emit(cs, src_va);
radeon_emit(cs, src_va >> 32); radeon_emit(cs, src_va >> 32);
radeon_emit(cs, dst_va); radeon_emit(cs, dst_va);
radeon_emit(cs, dst_va >> 32); radeon_emit(cs, dst_va >> 32);
radeon_end();
} }

View file

@ -144,6 +144,15 @@ si_create_shadowing_ib_preamble(struct si_context *sctx)
return pm4; return pm4;
} }
static void si_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
const uint32_t *values)
{
radeon_begin(cs);
radeon_set_context_reg_seq(cs, reg, num);
radeon_emit_array(cs, values, num);
radeon_end();
}
void si_init_cp_reg_shadowing(struct si_context *sctx) void si_init_cp_reg_shadowing(struct si_context *sctx)
{ {
if (sctx->screen->info.mid_command_buffer_preemption_enabled || if (sctx->screen->info.mid_command_buffer_preemption_enabled ||
@ -174,8 +183,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs, radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
si_pm4_emit(sctx, shadowing_preamble); si_pm4_emit(sctx, shadowing_preamble);
ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array);
radeon_set_context_reg_seq_array);
si_pm4_emit(sctx, sctx->cs_preamble_state); si_pm4_emit(sctx, sctx->cs_preamble_state);
/* The register values are shadowed, so we won't need to set them again. */ /* The register values are shadowed, so we won't need to set them again. */

View file

@ -1930,82 +1930,59 @@ void si_shader_change_notify(struct si_context *sctx)
PIPE_SHADER_TESS_EVAL)); PIPE_SHADER_TESS_EVAL));
} }
static void si_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, #define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \
unsigned pointer_count) unsigned sh_reg_base = (sh_base); \
{ if (sh_reg_base) { \
SI_CHECK_SHADOWED_REGS(sh_offset, pointer_count); unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0)); \
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2); while (mask) { \
} int start, count; \
u_bit_scan_consecutive_range(&mask, &start, &count); \
static void si_emit_shader_pointer_body(struct si_screen *sscreen, struct radeon_cmdbuf *cs, \
uint64_t va) struct si_descriptors *descs = &sctx->descriptors[start]; \
{ unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \
radeon_emit(cs, va); \
radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, count); \
assert(va == 0 || (va >> 32) == sscreen->info.address32_hi); for (int i = 0; i < count; i++) \
} radeon_emit_32bit_pointer(sctx->screen, cs, descs[i].gpu_address); \
} \
static void si_emit_shader_pointer(struct si_context *sctx, struct si_descriptors *desc, } \
unsigned sh_base) } while (0)
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned sh_offset = sh_base + desc->shader_userdata_offset;
si_emit_shader_pointer_head(cs, sh_offset, 1);
si_emit_shader_pointer_body(sctx->screen, cs, desc->gpu_address);
}
static void si_emit_consecutive_shader_pointers(struct si_context *sctx, unsigned pointer_mask,
unsigned sh_base)
{
if (!sh_base)
return;
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
while (mask) {
int start, count;
u_bit_scan_consecutive_range(&mask, &start, &count);
struct si_descriptors *descs = &sctx->descriptors[start];
unsigned sh_offset = sh_base + descs->shader_userdata_offset;
si_emit_shader_pointer_head(cs, sh_offset, count);
for (int i = 0; i < count; i++)
si_emit_shader_pointer_body(sctx->screen, cs, descs[i].gpu_address);
}
}
static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
{ {
radeon_begin(&sctx->gfx_cs);
if (sctx->chip_class >= GFX10) { if (sctx->chip_class >= GFX10) {
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
/* HW VS stage only used in non-NGG mode. */ /* HW VS stage only used in non-NGG mode. */
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
radeon_end();
return; return;
} else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) { } else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {
/* We can't use the COMMON registers with register shadowing. */ /* We can't use the COMMON registers with register shadowing. */
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
radeon_end();
return; return;
} else if (sctx->chip_class == GFX9) { } else if (sctx->chip_class == GFX9) {
/* Broadcast it to all shader stages. */ /* Broadcast it to all shader stages. */
si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
radeon_end();
return; return;
} }
si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
si_emit_shader_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0); radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);
radeon_end();
} }
void si_emit_graphics_shader_pointers(struct si_context *sctx) void si_emit_graphics_shader_pointers(struct si_context *sctx)
@ -2016,6 +1993,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_RW_BUFFERS]); si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
} }
radeon_begin(&sctx->gfx_cs);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX), si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
sh_base[PIPE_SHADER_VERTEX]); sh_base[PIPE_SHADER_VERTEX]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL), si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
@ -2030,8 +2008,6 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) { if (sctx->vertex_buffer_pointer_dirty && sctx->num_vertex_elements) {
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
/* Find the location of the VB descriptor pointer. */ /* Find the location of the VB descriptor pointer. */
unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR; unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
@ -2042,22 +2018,22 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
} }
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4; unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
si_emit_shader_pointer_head(cs, sh_offset, 1); radeon_set_sh_reg_seq(cs, sh_offset, 1);
si_emit_shader_pointer_body( radeon_emit_32bit_pointer(
sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + sctx->vb_descriptors_offset); sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + sctx->vb_descriptors_offset);
sctx->vertex_buffer_pointer_dirty = false; sctx->vertex_buffer_pointer_dirty = false;
} }
if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements && if (sctx->vertex_buffer_user_sgprs_dirty && sctx->num_vertex_elements &&
sctx->screen->num_vbos_in_user_sgprs) { sctx->screen->num_vbos_in_user_sgprs) {
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs); unsigned num_desc = MIN2(sctx->num_vertex_elements, sctx->screen->num_vbos_in_user_sgprs);
unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4; unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + SI_SGPR_VS_VB_DESCRIPTOR_FIRST * 4;
si_emit_shader_pointer_head(cs, sh_offset, num_desc * 4); radeon_set_sh_reg_seq(cs, sh_offset, num_desc * 4);
radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_desc * 4); radeon_emit_array(cs, sctx->vb_descriptor_user_sgprs, num_desc * 4);
sctx->vertex_buffer_user_sgprs_dirty = false; sctx->vertex_buffer_user_sgprs_dirty = false;
} }
radeon_end();
if (sctx->graphics_bindless_pointer_dirty) { if (sctx->graphics_bindless_pointer_dirty) {
si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors); si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
@ -2071,12 +2047,13 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel; struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
unsigned base = R_00B900_COMPUTE_USER_DATA_0; unsigned base = R_00B900_COMPUTE_USER_DATA_0;
radeon_begin(cs);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE), si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
R_00B900_COMPUTE_USER_DATA_0); R_00B900_COMPUTE_USER_DATA_0);
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE); sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
if (sctx->compute_bindless_pointer_dirty) { if (sctx->compute_bindless_pointer_dirty) {
si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base); radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base);
sctx->compute_bindless_pointer_dirty = false; sctx->compute_bindless_pointer_dirty = false;
} }
@ -2085,7 +2062,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) { if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE); struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 + radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
shader->cs_shaderbufs_sgpr_index * 4, shader->cs_shaderbufs_sgpr_index * 4,
num_shaderbufs * 4); num_shaderbufs * 4);
@ -2100,7 +2077,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
if (num_images && sctx->compute_image_sgprs_dirty) { if (num_images && sctx->compute_image_sgprs_dirty) {
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE); struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 + radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
shader->cs_images_sgpr_index * 4, shader->cs_images_sgpr_index * 4,
shader->cs_images_num_sgprs); shader->cs_images_num_sgprs);
@ -2119,6 +2096,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
sctx->compute_image_sgprs_dirty = false; sctx->compute_image_sgprs_dirty = false;
} }
radeon_end();
} }
/* BINDLESS */ /* BINDLESS */

View file

@ -75,6 +75,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel); unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel);
bool compute_ib = !ctx->has_graphics || cs == &ctx->prim_discard_compute_cs; bool compute_ib = !ctx->has_graphics || cs == &ctx->prim_discard_compute_cs;
radeon_begin(cs);
if (ctx->chip_class >= GFX9 || (compute_ib && ctx->chip_class >= GFX7)) { if (ctx->chip_class >= GFX9 || (compute_ib && ctx->chip_class >= GFX7)) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
* counters) must immediately precede every timestamp event to * counters) must immediately precede every timestamp event to
@ -136,6 +138,8 @@ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigne
radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */
} }
radeon_end();
if (buf) { if (buf) {
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
} }
@ -154,6 +158,7 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen)
void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref, void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref,
uint32_t mask, unsigned flags) uint32_t mask, unsigned flags)
{ {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags); radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags);
radeon_emit(cs, va); radeon_emit(cs, va);
@ -161,6 +166,7 @@ void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t v
radeon_emit(cs, ref); /* reference value */ radeon_emit(cs, ref); /* reference value */
radeon_emit(cs, mask); /* mask */ radeon_emit(cs, mask); /* mask */
radeon_emit(cs, 4); /* poll interval */ radeon_emit(cs, 4); /* poll interval */
radeon_end();
} }
static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence) static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence)

View file

@ -110,8 +110,10 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
/* Make sure compute shaders are idle before leaving the IB, so that /* Make sure compute shaders are idle before leaving the IB, so that
* the next IB doesn't overwrite GDS that might be in use. */ * the next IB doesn't overwrite GDS that might be in use. */
radeon_begin(compute_cs);
radeon_emit(compute_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(compute_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(compute_cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); radeon_emit(compute_cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_end();
/* Save the GDS prim restart counter if needed. */ /* Save the GDS prim restart counter if needed. */
if (ctx->preserve_prim_restart_gds_at_flush) { if (ctx->preserve_prim_restart_gds_at_flush) {
@ -559,6 +561,8 @@ void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, uns
assert(sctx->chip_class <= GFX9); assert(sctx->chip_class <= GFX9);
radeon_begin(cs);
if (sctx->chip_class == GFX9 || compute_ib) { if (sctx->chip_class == GFX9 || compute_ib) {
/* Flush caches and wait for the caches to assert idle. */ /* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0)); radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
@ -576,6 +580,7 @@ void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, uns
radeon_emit(cs, 0); /* CP_COHER_BASE */ radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
} }
radeon_end();
/* ACQUIRE_MEM has an implicit context roll if the current context /* ACQUIRE_MEM has an implicit context roll if the current context
* is busy. */ * is busy. */
@ -599,6 +604,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
/* We don't need these. */ /* We don't need these. */
assert(!(flags & (SI_CONTEXT_VGT_STREAMOUT_SYNC | SI_CONTEXT_FLUSH_AND_INV_DB_META))); assert(!(flags & (SI_CONTEXT_VGT_STREAMOUT_SYNC | SI_CONTEXT_FLUSH_AND_INV_DB_META)));
radeon_begin(cs);
if (flags & SI_CONTEXT_VGT_FLUSH) { if (flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
@ -686,6 +693,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
ctx->num_cs_flushes++; ctx->num_cs_flushes++;
ctx->compute_is_busy = false; ctx->compute_is_busy = false;
} }
radeon_end();
if (cb_db_event) { if (cb_db_event) {
struct si_resource* wait_mem_scratch = unlikely(ctx->ws->cs_is_secure(cs)) ? struct si_resource* wait_mem_scratch = unlikely(ctx->ws->cs_is_secure(cs)) ?
@ -729,6 +737,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL); si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL);
} }
radeon_begin_again(cs);
/* Ignore fields that only modify the behavior of other fields. */ /* Ignore fields that only modify the behavior of other fields. */
if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
/* Flush caches and wait for the caches to assert idle. /* Flush caches and wait for the caches to assert idle.
@ -757,6 +767,7 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
} }
radeon_end();
ctx->flags = 0; ctx->flags = 0;
} }
@ -820,6 +831,8 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
} }
radeon_begin(cs);
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@ -868,6 +881,8 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
} }
radeon_end();
/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't /* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
* wait for idle on GFX9. We have to use a TS event. * wait for idle on GFX9. We have to use a TS event.
*/ */
@ -934,8 +949,10 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
if (sctx->has_graphics && if (sctx->has_graphics &&
(cp_coher_cntl || (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VCACHE | (cp_coher_cntl || (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2)))) { SI_CONTEXT_INV_L2 | SI_CONTEXT_WB_L2)))) {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_end();
} }
/* GFX6-GFX8 only: /* GFX6-GFX8 only:
@ -988,11 +1005,15 @@ void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
si_prim_discard_signal_next_compute_ib_start(sctx); si_prim_discard_signal_next_compute_ib_start(sctx);
if (flags & SI_CONTEXT_START_PIPELINE_STATS) { if (flags & SI_CONTEXT_START_PIPELINE_STATS) {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
radeon_end();
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS) { } else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS) {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
radeon_end();
} }
sctx->flags = 0; sctx->flags = 0;

View file

@ -723,16 +723,20 @@ static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
value |= S_030800_INSTANCE_BROADCAST_WRITES(1); value |= S_030800_INSTANCE_BROADCAST_WRITES(1);
} }
radeon_begin(cs);
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value); radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
radeon_end();
} }
static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders) static void si_pc_emit_shaders(struct si_context *sctx, unsigned shaders)
{ {
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2, false); radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2, false);
radeon_emit(cs, shaders & 0x7f); radeon_emit(cs, shaders & 0x7f);
radeon_emit(cs, 0xffffffff); radeon_emit(cs, 0xffffffff);
radeon_end();
} }
static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block, unsigned count, static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block, unsigned count,
@ -749,6 +753,8 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block
if (regs->layout & SI_PC_FAKE) if (regs->layout & SI_PC_FAKE)
return; return;
radeon_begin(cs);
if (layout_multi == SI_PC_MULTI_BLOCK) { if (layout_multi == SI_PC_MULTI_BLOCK) {
assert(!(regs->layout & SI_PC_REG_REVERSE)); assert(!(regs->layout & SI_PC_REG_REVERSE));
@ -826,6 +832,7 @@ static void si_pc_emit_select(struct si_context *sctx, struct si_pc_block *block
radeon_emit(cs, 0); radeon_emit(cs, 0);
} }
} }
radeon_end();
} }
static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va) static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer, uint64_t va)
@ -835,12 +842,14 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address,
COPY_DATA_IMM, NULL, 1); COPY_DATA_IMM, NULL, 1);
radeon_begin(cs);
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING)); S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
radeon_end();
} }
/* Note: The buffer was already added in si_pc_emit_start, so we don't have to /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
@ -853,6 +862,7 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY);
si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL); si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@ -860,6 +870,7 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
radeon_set_uconfig_reg( radeon_set_uconfig_reg(
cs, R_036020_CP_PERFMON_CNTL, cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1)); S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1));
radeon_end();
} }
static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block, unsigned count, static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block, unsigned count,
@ -871,6 +882,8 @@ static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block,
unsigned reg = regs->counter0_lo; unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8; unsigned reg_delta = 8;
radeon_begin(cs);
if (!(regs->layout & SI_PC_FAKE)) { if (!(regs->layout & SI_PC_FAKE)) {
if (regs->layout & SI_PC_REG_REVERSE) if (regs->layout & SI_PC_REG_REVERSE)
reg_delta = -reg_delta; reg_delta = -reg_delta;
@ -901,6 +914,7 @@ static void si_pc_emit_read(struct si_context *sctx, struct si_pc_block *block,
va += sizeof(uint64_t); va += sizeof(uint64_t);
} }
} }
radeon_end();
} }
static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery) static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery)
@ -921,6 +935,8 @@ static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery
void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit) void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit)
{ {
radeon_begin(&sctx->gfx_cs);
if (sctx->chip_class >= GFX10) { if (sctx->chip_class >= GFX10) {
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
S_037390_PERFMON_CLOCK_STATE(inhibit)); S_037390_PERFMON_CLOCK_STATE(inhibit));
@ -928,6 +944,7 @@ void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, b
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
S_0372FC_PERFMON_CLOCK_STATE(inhibit)); S_0372FC_PERFMON_CLOCK_STATE(inhibit));
} }
radeon_end();
} }
static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)

View file

@ -116,7 +116,9 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
} }
radeon_begin(cs);
radeon_emit_array(cs, state->pm4, state->ndw); radeon_emit_array(cs, state->pm4, state->ndw);
radeon_end();
if (state->atom.emit) if (state->atom.emit)
state->atom.emit(sctx); state->atom.emit(sctx);

View file

@ -25,6 +25,7 @@
*/ */
#include "si_query.h" #include "si_query.h"
#include "si_build_pm4.h"
#include "amd/common/sid.h" #include "amd/common/sid.h"
#include "si_pipe.h" #include "si_pipe.h"
@ -771,10 +772,12 @@ static unsigned event_type_for_stream(unsigned stream)
static void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigned stream) static void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, unsigned stream)
{ {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3)); radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_end();
} }
static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query, static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query,
@ -785,12 +788,15 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
switch (query->b.type) { switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_end();
break; break;
}
case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_STATISTICS:
@ -805,12 +811,15 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type); EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type);
break; break;
case PIPE_QUERY_PIPELINE_STATISTICS: case PIPE_QUERY_PIPELINE_STATISTICS: {
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_end();
break; break;
}
default: default:
assert(0); assert(0);
} }
@ -846,15 +855,18 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
switch (query->b.type) { switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
va += 8; va += 8;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_end();
fence_va = va + sctx->screen->info.max_render_backends * 16 - 8; fence_va = va + sctx->screen->info.max_render_backends * 16 - 8;
break; break;
}
case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_STATISTICS:
@ -879,10 +891,12 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
unsigned sample_size = (query->result_size - 8) / 2; unsigned sample_size = (query->result_size - 8) / 2;
va += sample_size; va += sample_size;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radeon_end();
fence_va = va + sample_size; fence_va = va + sample_size;
break; break;
@ -934,6 +948,8 @@ static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf,
{ {
struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct radeon_cmdbuf *cs = &ctx->gfx_cs;
radeon_begin(cs);
if (ctx->chip_class >= GFX9) { if (ctx->chip_class >= GFX9) {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
radeon_emit(cs, op); radeon_emit(cs, op);
@ -944,6 +960,8 @@ static void emit_set_predicate(struct si_context *ctx, struct si_resource *buf,
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, op | ((va >> 32) & 0xFF)); radeon_emit(cs, op | ((va >> 32) & 0xFF));
} }
radeon_end();
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY); radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY);
} }

View file

@ -35,22 +35,6 @@ static void
si_emit_spi_config_cntl(struct si_context* sctx, si_emit_spi_config_cntl(struct si_context* sctx,
struct radeon_cmdbuf *cs, bool enable); struct radeon_cmdbuf *cs, bool enable);
static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs,
unsigned reg,
unsigned value)
{
assert(reg < CIK_UCONFIG_REG_OFFSET);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
COPY_DATA_DST_SEL(COPY_DATA_PERF));
radeon_emit(cs, value);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0); /* unused */
}
static bool static bool
si_thread_trace_init_bo(struct si_context *sctx) si_thread_trace_init_bo(struct si_context *sctx)
{ {
@ -89,6 +73,8 @@ si_emit_thread_trace_start(struct si_context* sctx,
uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
unsigned max_se = sscreen->info.max_se; unsigned max_se = sscreen->info.max_se;
radeon_begin(cs);
for (unsigned se = 0; se < max_se; se++) { for (unsigned se = 0; se < max_se; se++) {
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se); uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se);
@ -220,6 +206,7 @@ si_emit_thread_trace_start(struct si_context* sctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
} }
radeon_end();
} }
static const uint32_t gfx9_thread_trace_info_regs[] = static const uint32_t gfx9_thread_trace_info_regs[] =
@ -258,6 +245,8 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
uint64_t info_va = ac_thread_trace_get_info_va(va, se_index); uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
radeon_begin(cs);
/* Copy back the info struct one DWORD at a time. */ /* Copy back the info struct one DWORD at a time. */
for (unsigned i = 0; i < 3; i++) { for (unsigned i = 0; i < 3; i++) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
@ -269,6 +258,7 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
radeon_emit(cs, (info_va + i * 4)); radeon_emit(cs, (info_va + i * 4));
radeon_emit(cs, (info_va + i * 4) >> 32); radeon_emit(cs, (info_va + i * 4) >> 32);
} }
radeon_end();
} }
@ -280,6 +270,8 @@ si_emit_thread_trace_stop(struct si_context *sctx,
{ {
unsigned max_se = sctx->screen->info.max_se; unsigned max_se = sctx->screen->info.max_se;
radeon_begin(cs);
/* Stop the thread trace with a different event based on the queue. */ /* Stop the thread trace with a different event based on the queue. */
if (queue_family_index == RING_COMPUTE) { if (queue_family_index == RING_COMPUTE) {
radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
@ -291,8 +283,11 @@ si_emit_thread_trace_stop(struct si_context *sctx,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
radeon_end();
for (unsigned se = 0; se < max_se; se++) { for (unsigned se = 0; se < max_se; se++) {
radeon_begin(cs);
/* Target SEi and SH0. */ /* Target SEi and SH0. */
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_INDEX(se) | S_030800_SE_INDEX(se) |
@ -335,15 +330,18 @@ si_emit_thread_trace_stop(struct si_context *sctx,
radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */ radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
radeon_emit(cs, 4); /* poll interval */ radeon_emit(cs, 4); /* poll interval */
} }
radeon_end();
si_copy_thread_trace_info_regs(sctx, cs, se); si_copy_thread_trace_info_regs(sctx, cs, se);
} }
/* Restore global broadcasting. */ /* Restore global broadcasting. */
radeon_begin_again(cs);
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_BROADCAST_WRITES(1) | S_030800_SE_BROADCAST_WRITES(1) |
S_030800_SH_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1)); S_030800_INSTANCE_BROADCAST_WRITES(1));
radeon_end();
} }
static void static void
@ -351,6 +349,8 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
{ {
struct radeon_winsys *ws = sctx->ws; struct radeon_winsys *ws = sctx->ws;
radeon_begin(cs);
switch (family) { switch (family) {
case RING_GFX: case RING_GFX:
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@ -362,6 +362,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
radeon_emit(cs, 0); radeon_emit(cs, 0);
break; break;
} }
radeon_end();
ws->cs_add_buffer(cs, ws->cs_add_buffer(cs,
sctx->thread_trace->bo, sctx->thread_trace->bo,
@ -390,6 +391,9 @@ static void
si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs) si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
{ {
struct radeon_winsys *ws = sctx->ws; struct radeon_winsys *ws = sctx->ws;
radeon_begin(cs);
switch (family) { switch (family) {
case RING_GFX: case RING_GFX:
radeon_emit(sctx->thread_trace->stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(sctx->thread_trace->stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@ -401,6 +405,8 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
radeon_emit(sctx->thread_trace->stop_cs[family], 0); radeon_emit(sctx->thread_trace->stop_cs[family], 0);
break; break;
} }
radeon_end();
ws->cs_add_buffer(cs, ws->cs_add_buffer(cs,
sctx->thread_trace->bo, sctx->thread_trace->bo,
RADEON_USAGE_READWRITE, RADEON_USAGE_READWRITE,
@ -643,6 +649,8 @@ si_emit_thread_trace_userdata(struct si_context* sctx,
{ {
const uint32_t *dwords = (uint32_t *)data; const uint32_t *dwords = (uint32_t *)data;
radeon_begin(cs);
while (num_dwords > 0) { while (num_dwords > 0) {
uint32_t count = MIN2(num_dwords, 2); uint32_t count = MIN2(num_dwords, 2);
@ -655,12 +663,15 @@ si_emit_thread_trace_userdata(struct si_context* sctx,
dwords += count; dwords += count;
num_dwords -= count; num_dwords -= count;
} }
radeon_end();
} }
static void static void
si_emit_spi_config_cntl(struct si_context* sctx, si_emit_spi_config_cntl(struct si_context* sctx,
struct radeon_cmdbuf *cs, bool enable) struct radeon_cmdbuf *cs, bool enable)
{ {
radeon_begin(cs);
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
S_031100_EXP_PRIORITY_ORDER(3) | S_031100_EXP_PRIORITY_ORDER(3) |
@ -677,6 +688,7 @@ si_emit_spi_config_cntl(struct si_context* sctx,
S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
S_009100_ENABLE_SQG_BOP_EVENTS(enable)); S_009100_ENABLE_SQG_BOP_EVENTS(enable));
} }
radeon_end();
} }
void void

View file

@ -91,11 +91,13 @@ static void si_emit_cb_render_state(struct si_context *sctx)
if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) { if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) {
sctx->last_cb_target_mask = cb_target_mask; sctx->last_cb_target_mask = cb_target_mask;
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
radeon_end();
} }
unsigned initial_cdw = cs->current.cdw; radeon_begin(cs);
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK,
cb_target_mask); cb_target_mask);
@ -256,8 +258,7 @@ static void si_emit_cb_render_state(struct si_context *sctx)
radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
} }
if (initial_cdw != cs->current.cdw) radeon_end_update_context_roll(sctx);
sctx->context_roll = true;
} }
/* /*
@ -689,8 +690,10 @@ static void si_emit_blend_color(struct si_context *sctx)
{ {
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
radeon_emit_array(cs, (uint32_t *)sctx->blend_color.state.color, 4); radeon_emit_array(cs, (uint32_t *)sctx->blend_color.state.color, 4);
radeon_end();
} }
/* /*
@ -721,8 +724,10 @@ static void si_emit_clip_state(struct si_context *sctx)
{ {
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4); radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6 * 4);
radeon_emit_array(cs, (uint32_t *)sctx->clip_state.state.ucp, 6 * 4); radeon_emit_array(cs, (uint32_t *)sctx->clip_state.state.ucp, 6 * 4);
radeon_end();
} }
static void si_emit_clip_regs(struct si_context *sctx) static void si_emit_clip_regs(struct si_context *sctx)
@ -747,7 +752,6 @@ static void si_emit_clip_regs(struct si_context *sctx)
clipdist_mask &= rs->clip_plane_enable; clipdist_mask &= rs->clip_plane_enable;
culldist_mask |= clipdist_mask; culldist_mask |= clipdist_mask;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) | unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 && S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
@ -755,6 +759,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) | S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
clipdist_mask | (culldist_mask << 8); clipdist_mask | (culldist_mask << 8);
radeon_begin(&sctx->gfx_cs);
if (sctx->chip_class >= GFX10) { if (sctx->chip_class >= GFX10) {
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl, SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,
@ -765,9 +771,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
} }
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
/* /*
@ -1048,6 +1052,7 @@ static void si_emit_stencil_ref(struct si_context *sctx)
struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
S_028430_STENCILMASK(dsa->valuemask[0]) | S_028430_STENCILMASK(dsa->valuemask[0]) |
@ -1056,6 +1061,7 @@ static void si_emit_stencil_ref(struct si_context *sctx)
S_028434_STENCILMASK_BF(dsa->valuemask[1]) | S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
S_028434_STENCILOPVAL_BF(1)); S_028434_STENCILOPVAL_BF(1));
radeon_end();
} }
static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state) static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state)
@ -1334,7 +1340,6 @@ static void si_emit_db_render_state(struct si_context *sctx)
{ {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned db_shader_control, db_render_control, db_count_control; unsigned db_shader_control, db_render_control, db_count_control;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
/* DB_RENDER_CONTROL */ /* DB_RENDER_CONTROL */
if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) {
@ -1374,6 +1379,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
} }
} }
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL,
db_render_control, db_count_control); db_render_control, db_count_control);
@ -1427,9 +1433,7 @@ static void si_emit_db_render_state(struct si_context *sctx)
S_028064_VRS_OVERRIDE_RATE_Y(0)); S_028064_VRS_OVERRIDE_RATE_Y(0));
} }
} }
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
/* /*
@ -2909,6 +2913,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
struct si_surface *cb = NULL; struct si_surface *cb = NULL;
unsigned cb_color_info = 0; unsigned cb_color_info = 0;
radeon_begin(cs);
/* Colorbuffers. */ /* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) { for (i = 0; i < nr_cbufs; i++) {
uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
@ -3260,6 +3266,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
} }
radeon_end();
si_update_display_dcc_dirty(sctx); si_update_display_dcc_dirty(sctx);
@ -3292,6 +3299,8 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
si_emit_sample_locations(cs, nr_samples); si_emit_sample_locations(cs, nr_samples);
} }
radeon_begin(cs);
if (sctx->family >= CHIP_POLARIS10) { if (sctx->family >= CHIP_POLARIS10) {
unsigned small_prim_filter_cntl = unsigned small_prim_filter_cntl =
S_028830_SMALL_PRIM_FILTER_ENABLE(1) | S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
@ -3323,6 +3332,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
radeon_opt_set_context_reg( radeon_opt_set_context_reg(
sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
radeon_end();
} }
static bool si_out_of_order_rasterization(struct si_context *sctx) static bool si_out_of_order_rasterization(struct si_context *sctx)
@ -3501,7 +3511,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
} }
} }
unsigned initial_cdw = cs->current.cdw; radeon_begin(cs);
/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL,
@ -3512,7 +3522,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1); sc_mode_cntl_1);
if (initial_cdw != cs->current.cdw) { if (radeon_packets_added()) {
sctx->context_roll = true; sctx->context_roll = true;
/* GFX9: Flush DFSM when the AA mode changes. */ /* GFX9: Flush DFSM when the AA mode changes. */
@ -3521,6 +3531,7 @@ static void si_emit_msaa_config(struct si_context *sctx)
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
} }
} }
radeon_end();
} }
void si_update_ps_iter_samples(struct si_context *sctx) void si_update_ps_iter_samples(struct si_context *sctx)
@ -4509,9 +4520,11 @@ static void si_emit_sample_mask(struct si_context *sctx)
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
(mask & 1 && sctx->blitter->running)); (mask & 1 && sctx->blitter->running));
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
radeon_emit(cs, mask | (mask << 16)); radeon_emit(cs, mask | (mask << 16));
radeon_emit(cs, mask | (mask << 16)); radeon_emit(cs, mask | (mask << 16));
radeon_end();
} }
static void si_delete_sampler_state(struct pipe_context *ctx, void *state) static void si_delete_sampler_state(struct pipe_context *ctx, void *state)

View file

@ -404,7 +404,7 @@ static void gfx10_get_bin_sizes(struct si_context *sctx, unsigned cb_target_enab
static void si_emit_dpbb_disable(struct si_context *sctx) static void si_emit_dpbb_disable(struct si_context *sctx)
{ {
unsigned initial_cdw = sctx->gfx_cs.current.cdw; radeon_begin(&sctx->gfx_cs);
if (sctx->chip_class >= GFX10) { if (sctx->chip_class >= GFX10) {
struct uvec2 bin_size = {}; struct uvec2 bin_size = {};
@ -441,8 +441,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
radeon_opt_set_context_reg( radeon_opt_set_context_reg(
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
if (initial_cdw != sctx->gfx_cs.current.cdw) radeon_end_update_context_roll(sctx);
sctx->context_roll = true;
sctx->last_binning_enabled = false; sctx->last_binning_enabled = false;
} }
@ -526,7 +525,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
if (bin_size.y >= 32) if (bin_size.y >= 32)
bin_size_extend.y = util_logbase2(bin_size.y) - 5; bin_size_extend.y = util_logbase2(bin_size.y) - 5;
unsigned initial_cdw = sctx->gfx_cs.current.cdw; radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg( radeon_opt_set_context_reg(
sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, sctx, R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) | S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
@ -546,8 +545,7 @@ void si_emit_dpbb_state(struct si_context *sctx)
radeon_opt_set_context_reg( radeon_opt_set_context_reg(
sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL, sctx, db_dfsm_control, SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); S_028060_PUNCHOUT_MODE(punchout_mode) | S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
if (initial_cdw != sctx->gfx_cs.current.cdw) radeon_end_update_context_roll(sctx);
sctx->context_roll = true;
sctx->last_binning_enabled = true; sctx->last_binning_enabled = true;
} }

View file

@ -399,6 +399,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
assert(ls_current->config.lds_size == 0); assert(ls_current->config.lds_size == 0);
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
unsigned hs_rsrc2 = ls_current->config.rsrc2; unsigned hs_rsrc2 = ls_current->config.rsrc2;
@ -443,6 +444,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2); radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 2);
radeon_emit(cs, offchip_layout); radeon_emit(cs, offchip_layout);
radeon_emit(cs, ring_va); radeon_emit(cs, ring_va);
radeon_end();
unsigned ls_hs_config = unsigned ls_hs_config =
S_028B58_NUM_PATCHES(*num_patches) | S_028B58_NUM_PATCHES(*num_patches) |
@ -450,13 +452,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
if (sctx->last_ls_hs_config != ls_hs_config) { if (sctx->last_ls_hs_config != ls_hs_config) {
radeon_begin(cs);
if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX7) {
radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
} else { } else {
radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
} }
radeon_end_update_context_roll(sctx);
sctx->last_ls_hs_config = ls_hs_config; sctx->last_ls_hs_config = ls_hs_config;
sctx->context_roll = true;
} }
} }
@ -734,7 +737,8 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
enum pipe_prim_type rast_prim = sctx->current_rast_prim; enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned initial_cdw = cs->current.cdw;
radeon_begin(cs);
if (unlikely(si_is_line_stipple_enabled(sctx))) { if (unlikely(si_is_line_stipple_enabled(sctx))) {
/* For lines, reset the stipple pattern at each primitive. Otherwise, /* For lines, reset the stipple pattern at each primitive. Otherwise,
@ -756,8 +760,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
sctx->last_gs_out_prim = gs_out_prim; sctx->last_gs_out_prim = gs_out_prim;
} }
if (GFX_VERSION == GFX9 && initial_cdw != cs->current.cdw) if (GFX_VERSION == GFX9)
sctx->context_roll = true; radeon_end_update_context_roll(sctx);
else
radeon_end();
if (NGG) { if (NGG) {
struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current; struct si_shader *hw_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
@ -797,6 +803,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
/* For the API vertex shader (VS_STATE_INDEXED, LS_OUT_*). */ /* For the API vertex shader (VS_STATE_INDEXED, LS_OUT_*). */
unsigned vs_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG, unsigned vs_base = si_get_user_data_base(GFX_VERSION, HAS_TESS, HAS_GS, NGG,
PIPE_SHADER_VERTEX); PIPE_SHADER_VERTEX);
radeon_begin(cs);
radeon_set_sh_reg(cs, vs_base + SI_SGPR_VS_STATE_BITS * 4, radeon_set_sh_reg(cs, vs_base + SI_SGPR_VS_STATE_BITS * 4,
sctx->current_vs_state); sctx->current_vs_state);
@ -815,6 +822,7 @@ static void si_emit_vs_state(struct si_context *sctx, unsigned index_size)
radeon_set_sh_reg(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + SI_SGPR_VS_STATE_BITS * 4, radeon_set_sh_reg(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + SI_SGPR_VS_STATE_BITS * 4,
sctx->current_vs_state); sctx->current_vs_state);
} }
radeon_end();
sctx->last_vs_state = sctx->current_vs_state; sctx->last_vs_state = sctx->current_vs_state;
} }
@ -845,14 +853,18 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
/* Draw state. */ /* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) { if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
radeon_begin(cs);
if (GFX_VERSION == GFX9) if (GFX_VERSION == GFX9)
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4, radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION,
ia_multi_vgt_param); R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
else if (GFX_VERSION >= GFX7) else if (GFX_VERSION >= GFX7)
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else else
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
radeon_end();
sctx->last_multi_vgt_param = ia_multi_vgt_param; sctx->last_multi_vgt_param = ia_multi_vgt_param;
} }
} }
@ -897,7 +909,11 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
ge_cntl |= S_03096C_PACKET_TO_ONE_PA(si_is_line_stipple_enabled(sctx)); ge_cntl |= S_03096C_PACKET_TO_ONE_PA(si_is_line_stipple_enabled(sctx));
if (ge_cntl != sctx->last_multi_vgt_param) { if (ge_cntl != sctx->last_multi_vgt_param) {
radeon_set_uconfig_reg(&sctx->gfx_cs, R_03096C_GE_CNTL, ge_cntl); struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_uconfig_reg(cs, R_03096C_GE_CNTL, ge_cntl);
radeon_end();
sctx->last_multi_vgt_param = ge_cntl; sctx->last_multi_vgt_param = ge_cntl;
} }
} }
@ -919,13 +935,15 @@ static void si_emit_draw_registers(struct si_context *sctx,
(sctx, indirect, prim, num_patches, instance_count, primitive_restart, (sctx, indirect, prim, num_patches, instance_count, primitive_restart,
min_vertex_count, vertices_per_patch); min_vertex_count, vertices_per_patch);
radeon_begin(cs);
if (prim != sctx->last_prim) { if (prim != sctx->last_prim) {
unsigned vgt_prim = si_conv_pipe_prim(prim); unsigned vgt_prim = si_conv_pipe_prim(prim);
if (GFX_VERSION >= GFX10) if (GFX_VERSION >= GFX10)
radeon_set_uconfig_reg(cs, R_030908_VGT_PRIMITIVE_TYPE, vgt_prim); radeon_set_uconfig_reg(cs, R_030908_VGT_PRIMITIVE_TYPE, vgt_prim);
else if (GFX_VERSION >= GFX7) else if (GFX_VERSION >= GFX7)
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim); radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION, R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim);
else else
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim); radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim);
@ -947,14 +965,17 @@ static void si_emit_draw_registers(struct si_context *sctx,
if (GFX_VERSION == GFX9) if (GFX_VERSION == GFX9)
sctx->context_roll = true; sctx->context_roll = true;
} }
radeon_end();
} }
#define EMIT_SQTT_END_DRAW do { \ #define EMIT_SQTT_END_DRAW do { \
if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace_enabled)) { \ if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace_enabled)) { \
radeon_begin(&sctx->gfx_cs); \
radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); \ radeon_emit(&sctx->gfx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); \
radeon_emit(&sctx->gfx_cs, \ radeon_emit(&sctx->gfx_cs, \
EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | \ EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | \
EVENT_INDEX(0)); \ EVENT_INDEX(0)); \
radeon_end(); \
} \ } \
} while (0) } while (0)
@ -979,7 +1000,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
if (indirect && indirect->count_from_stream_output) { if (indirect && indirect->count_from_stream_output) {
struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output; struct si_streamout_target *t = (struct si_streamout_target *)indirect->count_from_stream_output;
radeon_begin(cs);
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
radeon_end();
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL,
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, COPY_DATA_SRC_MEM, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, COPY_DATA_SRC_MEM,
t->buf_filled_size, t->buf_filled_size_offset); t->buf_filled_size, t->buf_filled_size_offset);
@ -990,6 +1014,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
uint32_t index_max_size = 0; uint32_t index_max_size = 0;
uint64_t index_va = 0; uint64_t index_va = 0;
radeon_begin(cs);
/* draw packet */ /* draw packet */
if (index_size) { if (index_size) {
/* Register shadowing doesn't shadow INDEX_TYPE. */ /* Register shadowing doesn't shadow INDEX_TYPE. */
@ -1017,7 +1043,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
} }
if (GFX_VERSION >= GFX9) { if (GFX_VERSION >= GFX9) {
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_03090C_VGT_INDEX_TYPE, 2, index_type); radeon_set_uconfig_reg_idx(cs, sctx->screen, GFX_VERSION,
R_03090C_VGT_INDEX_TYPE, 2, index_type);
} else { } else {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cs, index_type); radeon_emit(cs, index_type);
@ -1032,8 +1059,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
/* Skip draw calls with 0-sized index buffers. /* Skip draw calls with 0-sized index buffers.
* They cause a hang on some chips, like Navi10-14. * They cause a hang on some chips, like Navi10-14.
*/ */
if (!index_max_size) if (!index_max_size) {
radeon_end();
return; return;
}
index_va = si_resource(indexbuf)->gpu_address + index_offset; index_va = si_resource(indexbuf)->gpu_address + index_offset;
@ -1173,6 +1202,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
if (index_size) { if (index_size) {
if (ALLOW_PRIM_DISCARD_CS && dispatch_prim_discard_cs) { if (ALLOW_PRIM_DISCARD_CS && dispatch_prim_discard_cs) {
radeon_end();
for (unsigned i = 0; i < num_draws; i++) { for (unsigned i = 0; i < num_draws; i++) {
uint64_t va = index_va + draws[0].start * original_index_size; uint64_t va = index_va + draws[0].start * original_index_size;
@ -1238,6 +1269,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(cs, draws[i].count); radeon_emit(cs, draws[i].count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX); radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
} }
radeon_end();
EMIT_SQTT_END_DRAW; EMIT_SQTT_END_DRAW;
return; return;
} }
@ -1265,6 +1298,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
sctx->last_base_vertex = draws[num_draws - 1].start; sctx->last_base_vertex = draws[num_draws - 1].start;
} }
} }
radeon_end();
EMIT_SQTT_END_DRAW; EMIT_SQTT_END_DRAW;
} }
@ -2181,8 +2215,10 @@ void si_trace_emit(struct si_context *sctx)
si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id); si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id);
radeon_begin(cs);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id)); radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
radeon_end();
if (sctx->log) if (sctx->log)
u_log_flush(sctx->log); u_log_flush(sctx->log);

View file

@ -150,6 +150,7 @@ static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_cou
static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
uint32_t sample_locs) uint32_t sample_locs)
{ {
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
radeon_emit(cs, centroid_priority); radeon_emit(cs, centroid_priority);
radeon_emit(cs, centroid_priority >> 32); radeon_emit(cs, centroid_priority >> 32);
@ -157,11 +158,13 @@ static void si_emit_max_4_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroi
radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs); radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs);
radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs); radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs);
radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs); radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs);
radeon_end();
} }
static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority, static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centroid_priority,
const uint32_t *sample_locs, unsigned num_samples) const uint32_t *sample_locs, unsigned num_samples)
{ {
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
radeon_emit(cs, centroid_priority); radeon_emit(cs, centroid_priority);
radeon_emit(cs, centroid_priority >> 32); radeon_emit(cs, centroid_priority >> 32);
@ -171,6 +174,7 @@ static void si_emit_max_16_sample_locs(struct radeon_cmdbuf *cs, uint64_t centro
radeon_emit_array(cs, sample_locs, 4); radeon_emit_array(cs, sample_locs, 4);
radeon_emit_array(cs, sample_locs, 4); radeon_emit_array(cs, sample_locs, 4);
radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4); radeon_emit_array(cs, sample_locs, num_samples == 8 ? 2 : 4);
radeon_end();
} }
void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) void si_emit_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)

View file

@ -566,11 +566,10 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
static void si_emit_shader_es(struct si_context *sctx) static void si_emit_shader_es(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.es->shader; struct si_shader *shader = sctx->queued.named.es->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE, radeon_opt_set_context_reg(sctx, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4); shader->selector->esgs_itemsize / 4);
@ -583,9 +582,7 @@ static void si_emit_shader_es(struct si_context *sctx)
radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl); shader->vgt_vertex_reuse_block_cntl);
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@ -729,11 +726,11 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
static void si_emit_shader_gs(struct si_context *sctx) static void si_emit_shader_gs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.gs->shader; struct si_shader *shader = sctx->queued.named.gs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2 /* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
* R_028A68_VGT_GSVS_RING_OFFSET_3 */ * R_028A68_VGT_GSVS_RING_OFFSET_3 */
radeon_opt_set_context_reg3( radeon_opt_set_context_reg3(
@ -782,9 +779,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl); shader->vgt_vertex_reuse_block_cntl);
} }
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@ -931,6 +926,8 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
sctx->tracked_regs.reg_value[reg] != value) { sctx->tracked_regs.reg_value[reg] != value) {
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
if (sctx->chip_class == GFX10) { if (sctx->chip_class == GFX10) {
/* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@ -938,6 +935,7 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
} }
radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value); radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC, value);
radeon_end();
sctx->tracked_regs.reg_saved |= 0x1ull << reg; sctx->tracked_regs.reg_saved |= 0x1ull << reg;
sctx->tracked_regs.reg_value[reg] = value; sctx->tracked_regs.reg_value[reg] = value;
@ -945,9 +943,9 @@ static void gfx10_emit_ge_pc_alloc(struct si_context *sctx, unsigned value)
} }
/* Common tail code for NGG primitive shaders. */ /* Common tail code for NGG primitive shaders. */
static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader, static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader *shader)
unsigned initial_cdw)
{ {
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, radeon_opt_set_context_reg(sctx, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
shader->ctx_reg.ngg.ge_max_output_per_subgroup); shader->ctx_reg.ngg.ge_max_output_per_subgroup);
@ -975,9 +973,7 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl, SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc); gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc);
@ -986,56 +982,55 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx) static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.gs->shader; struct si_shader *shader = sctx->queued.named.gs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw); gfx10_emit_shader_ngg_tail(sctx, shader);
} }
static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx) static void gfx10_emit_shader_ngg_tess_nogs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.gs->shader; struct si_shader *shader = sctx->queued.named.gs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
radeon_end_update_context_roll(sctx);
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw); gfx10_emit_shader_ngg_tail(sctx, shader);
} }
static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx) static void gfx10_emit_shader_ngg_notess_gs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.gs->shader; struct si_shader *shader = sctx->queued.named.gs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ctx_reg.ngg.vgt_gs_max_vert_out); shader->ctx_reg.ngg.vgt_gs_max_vert_out);
radeon_end_update_context_roll(sctx);
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw); gfx10_emit_shader_ngg_tail(sctx, shader);
} }
static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx) static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.gs->shader; struct si_shader *shader = sctx->queued.named.gs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, radeon_opt_set_context_reg(sctx, R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ctx_reg.ngg.vgt_gs_max_vert_out); shader->ctx_reg.ngg.vgt_gs_max_vert_out);
radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(sctx, R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
radeon_end_update_context_roll(sctx);
gfx10_emit_shader_ngg_tail(sctx, shader, initial_cdw); gfx10_emit_shader_ngg_tail(sctx, shader);
} }
unsigned si_get_input_prim(const struct si_shader_selector *gs) unsigned si_get_input_prim(const struct si_shader_selector *gs)
@ -1308,11 +1303,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
static void si_emit_shader_vs(struct si_context *sctx) static void si_emit_shader_vs(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.vs->shader; struct si_shader *shader = sctx->queued.named.vs->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(sctx, R_028A40_VGT_GS_MODE, SI_TRACKED_VGT_GS_MODE, radeon_opt_set_context_reg(sctx, R_028A40_VGT_GS_MODE, SI_TRACKED_VGT_GS_MODE,
shader->ctx_reg.vs.vgt_gs_mode); shader->ctx_reg.vs.vgt_gs_mode);
radeon_opt_set_context_reg(sctx, R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN, radeon_opt_set_context_reg(sctx, R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN,
@ -1356,9 +1350,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl, SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
} }
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
if (sctx->chip_class >= GFX10) if (sctx->chip_class >= GFX10)
@ -1536,11 +1528,10 @@ static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
static void si_emit_shader_ps(struct si_context *sctx) static void si_emit_shader_ps(struct si_context *sctx)
{ {
struct si_shader *shader = sctx->queued.named.ps->shader; struct si_shader *shader = sctx->queued.named.ps->shader;
unsigned initial_cdw = sctx->gfx_cs.current.cdw;
if (!shader) if (!shader)
return; return;
radeon_begin(&sctx->gfx_cs);
/* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/ /* R_0286CC_SPI_PS_INPUT_ENA, R_0286D0_SPI_PS_INPUT_ADDR*/
radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA, radeon_opt_set_context_reg2(sctx, R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA,
shader->ctx_reg.ps.spi_ps_input_ena, shader->ctx_reg.ps.spi_ps_input_ena,
@ -1558,9 +1549,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK,
shader->ctx_reg.ps.cb_shader_mask); shader->ctx_reg.ps.cb_shader_mask);
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
@ -3371,12 +3360,10 @@ static void si_emit_spi_map(struct si_context *sctx)
/* R_028644_SPI_PS_INPUT_CNTL_0 */ /* R_028644_SPI_PS_INPUT_CNTL_0 */
/* Dota 2: Only ~16% of SPI map updates set different values. */ /* Dota 2: Only ~16% of SPI map updates set different values. */
/* Talos: Only ~9% of SPI map updates set different values. */ /* Talos: Only ~9% of SPI map updates set different values. */
unsigned initial_cdw = sctx->gfx_cs.current.cdw; radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl, radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0, spi_ps_input_cntl,
sctx->tracked_regs.spi_ps_input_cntl, num_interp); sctx->tracked_regs.spi_ps_input_cntl, num_interp);
radeon_end_update_context_roll(sctx);
if (initial_cdw != sctx->gfx_cs.current.cdw)
sctx->context_roll = true;
} }
/** /**
@ -3405,6 +3392,8 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx)
*/ */
static void si_emit_vgt_flush(struct radeon_cmdbuf *cs) static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
{ {
radeon_begin(cs);
/* This is required before VGT_FLUSH. */ /* This is required before VGT_FLUSH. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@ -3412,6 +3401,7 @@ static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */ /* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
radeon_end();
} }
/* Initialize state related to ESGS / GSVS ring buffers */ /* Initialize state related to ESGS / GSVS ring buffers */
@ -3505,6 +3495,8 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
si_emit_vgt_flush(cs); si_emit_vgt_flush(cs);
radeon_begin(cs);
/* Set the GS registers. */ /* Set the GS registers. */
if (sctx->esgs_ring) { if (sctx->esgs_ring) {
assert(sctx->chip_class <= GFX8); assert(sctx->chip_class <= GFX8);
@ -3515,6 +3507,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
radeon_set_uconfig_reg(cs, R_030904_VGT_GSVS_RING_SIZE, radeon_set_uconfig_reg(cs, R_030904_VGT_GSVS_RING_SIZE,
sctx->gsvs_ring->width0 / 256); sctx->gsvs_ring->width0 / 256);
} }
radeon_end();
return true; return true;
} }
@ -3789,6 +3782,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
si_emit_vgt_flush(cs); si_emit_vgt_flush(cs);
/* Set tessellation registers. */ /* Set tessellation registers. */
radeon_begin(cs);
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4)); S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8); radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8);
@ -3801,6 +3795,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
} }
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
sctx->screen->vgt_hs_offchip_param); sctx->screen->vgt_hs_offchip_param);
radeon_end();
return; return;
} }
@ -4153,7 +4148,9 @@ static void si_emit_scratch_state(struct si_context *sctx)
{ {
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, sctx->spi_tmpring_size); radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, sctx->spi_tmpring_size);
radeon_end();
if (sctx->scratch_buffer) { if (sctx->scratch_buffer) {
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE, radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->scratch_buffer, RADEON_USAGE_READWRITE,

View file

@ -221,6 +221,8 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
last_target = i; last_target = i;
} }
radeon_begin(cs);
for (unsigned i = 0; i < sctx->streamout.num_targets; i++) { for (unsigned i = 0; i < sctx->streamout.num_targets; i++) {
if (!t[i]) if (!t[i])
continue; continue;
@ -246,6 +248,7 @@ static void gfx10_emit_streamout_begin(struct si_context *sctx)
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target)); radeon_emit(cs, S_414_BYTE_COUNT_GFX9(4) | S_414_DISABLE_WR_CONFIRM_GFX9(i != last_target));
} }
radeon_end();
sctx->streamout.begin_emitted = true; sctx->streamout.begin_emitted = true;
} }
@ -275,6 +278,8 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
struct radeon_cmdbuf *cs = &sctx->gfx_cs; struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned reg_strmout_cntl; unsigned reg_strmout_cntl;
radeon_begin(cs);
/* The register is at different places on different ASICs. */ /* The register is at different places on different ASICs. */
if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX7) {
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
@ -295,6 +300,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
radeon_emit(cs, 4); /* poll interval */ radeon_emit(cs, 4); /* poll interval */
radeon_end();
} }
static void si_emit_streamout_begin(struct si_context *sctx) static void si_emit_streamout_begin(struct si_context *sctx)
@ -306,6 +312,8 @@ static void si_emit_streamout_begin(struct si_context *sctx)
si_flush_vgt_streamout(sctx); si_flush_vgt_streamout(sctx);
radeon_begin(cs);
for (i = 0; i < sctx->streamout.num_targets; i++) { for (i = 0; i < sctx->streamout.num_targets; i++) {
if (!t[i]) if (!t[i])
continue; continue;
@ -344,6 +352,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
radeon_emit(cs, 0); /* unused */ radeon_emit(cs, 0); /* unused */
} }
} }
radeon_end();
sctx->streamout.begin_emitted = true; sctx->streamout.begin_emitted = true;
} }
@ -362,6 +371,8 @@ void si_emit_streamout_end(struct si_context *sctx)
si_flush_vgt_streamout(sctx); si_flush_vgt_streamout(sctx);
radeon_begin(cs);
for (i = 0; i < sctx->streamout.num_targets; i++) { for (i = 0; i < sctx->streamout.num_targets; i++) {
if (!t[i]) if (!t[i])
continue; continue;
@ -383,10 +394,10 @@ void si_emit_streamout_end(struct si_context *sctx)
* buffer bound. This ensures that the primitives-emitted query * buffer bound. This ensures that the primitives-emitted query
* won't increment. */ * won't increment. */
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0); radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
sctx->context_roll = true;
t[i]->buf_filled_size_valid = true; t[i]->buf_filled_size_valid = true;
} }
radeon_end_update_context_roll(sctx);
sctx->streamout.begin_emitted = false; sctx->streamout.begin_emitted = false;
} }
@ -402,6 +413,7 @@ static void si_emit_streamout_enable(struct si_context *sctx)
{ {
assert(!sctx->screen->use_ngg_streamout); assert(!sctx->screen->use_ngg_streamout);
radeon_begin(&sctx->gfx_cs);
radeon_set_context_reg_seq(&sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2); radeon_set_context_reg_seq(&sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
radeon_emit(&sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) | radeon_emit(&sctx->gfx_cs, S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
S_028B94_RAST_STREAM(0) | S_028B94_RAST_STREAM(0) |
@ -410,6 +422,7 @@ static void si_emit_streamout_enable(struct si_context *sctx)
S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx))); S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
radeon_emit(&sctx->gfx_cs, radeon_emit(&sctx->gfx_cs,
sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask); sctx->streamout.hw_enabled_mask & sctx->streamout.enabled_stream_buffers_mask);
radeon_end();
} }
static void si_set_streamout_enable(struct si_context *sctx, bool enable) static void si_set_streamout_enable(struct si_context *sctx, bool enable)

View file

@ -103,8 +103,10 @@ static void si_emit_cull_state(struct si_context *sctx)
/* This will end up in SGPR6 as (value << 8), shifted by the hw. */ /* This will end up in SGPR6 as (value << 8), shifted by the hw. */
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf, radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
radeon_begin(&sctx->gfx_cs);
radeon_set_sh_reg(&sctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS, radeon_set_sh_reg(&sctx->gfx_cs, R_00B220_SPI_SHADER_PGM_LO_GS,
sctx->small_prim_cull_info_address >> 8); sctx->small_prim_cull_info_address >> 8);
radeon_end();
/* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling. /* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling.
* *
@ -213,18 +215,22 @@ static void si_emit_one_scissor(struct si_context *ctx, struct radeon_cmdbuf *cs
if (scissor) if (scissor)
si_clip_scissor(&final, scissor); si_clip_scissor(&final, scissor);
radeon_begin(cs);
/* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_- /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
* SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
*/ */
if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) { if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1)); radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1));
radeon_emit(cs, S_028254_BR_X(1) | S_028254_BR_Y(1)); radeon_emit(cs, S_028254_BR_X(1) | S_028254_BR_Y(1));
radeon_end();
return; return;
} }
radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) | radeon_emit(cs, S_028250_TL_X(final.minx) | S_028250_TL_Y(final.miny) |
S_028250_WINDOW_OFFSET_DISABLE(1)); S_028250_WINDOW_OFFSET_DISABLE(1));
radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy)); radeon_emit(cs, S_028254_BR_X(final.maxx) | S_028254_BR_Y(final.maxy));
radeon_end();
} }
#define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176 #define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
@ -350,7 +356,7 @@ static void si_emit_guardband(struct si_context *ctx)
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ * R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
*/ */
unsigned initial_cdw = ctx->gfx_cs.current.cdw; radeon_begin(&ctx->gfx_cs);
radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y), SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x)); fui(guardband_x), fui(discard_x));
@ -362,8 +368,7 @@ static void si_emit_guardband(struct si_context *ctx)
ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, ctx, R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL,
S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_PIX_CENTER(rs->half_pixel_center) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode)); S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode));
if (initial_cdw != ctx->gfx_cs.current.cdw) radeon_end_update_context_roll(ctx);
ctx->context_roll = true;
} }
static void si_emit_scissors(struct si_context *ctx) static void si_emit_scissors(struct si_context *ctx)
@ -376,7 +381,10 @@ static void si_emit_scissors(struct si_context *ctx)
if (!ctx->vs_writes_viewport_index) { if (!ctx->vs_writes_viewport_index) {
struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0]; struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
radeon_end();
si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL); si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
return; return;
} }
@ -384,7 +392,10 @@ static void si_emit_scissors(struct si_context *ctx)
/* All registers in the array need to be updated if any of them is changed. /* All registers in the array need to be updated if any of them is changed.
* This is a hardware requirement. * This is a hardware requirement.
*/ */
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, SI_MAX_VIEWPORTS * 2); radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, SI_MAX_VIEWPORTS * 2);
radeon_end();
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) { for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i], si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
scissor_enabled ? &states[i] : NULL); scissor_enabled ? &states[i] : NULL);
@ -477,12 +488,14 @@ static void si_emit_one_viewport(struct si_context *ctx, struct pipe_viewport_st
{ {
struct radeon_cmdbuf *cs = &ctx->gfx_cs; struct radeon_cmdbuf *cs = &ctx->gfx_cs;
radeon_begin(cs);
radeon_emit(cs, fui(state->scale[0])); radeon_emit(cs, fui(state->scale[0]));
radeon_emit(cs, fui(state->translate[0])); radeon_emit(cs, fui(state->translate[0]));
radeon_emit(cs, fui(state->scale[1])); radeon_emit(cs, fui(state->scale[1]));
radeon_emit(cs, fui(state->translate[1])); radeon_emit(cs, fui(state->translate[1]));
radeon_emit(cs, fui(state->scale[2])); radeon_emit(cs, fui(state->scale[2]));
radeon_emit(cs, fui(state->translate[2])); radeon_emit(cs, fui(state->translate[2]));
radeon_end();
} }
static void si_emit_viewports(struct si_context *ctx) static void si_emit_viewports(struct si_context *ctx)
@ -492,7 +505,10 @@ static void si_emit_viewports(struct si_context *ctx)
/* The simple case: Only 1 viewport is active. */ /* The simple case: Only 1 viewport is active. */
if (!ctx->vs_writes_viewport_index) { if (!ctx->vs_writes_viewport_index) {
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
radeon_end();
si_emit_one_viewport(ctx, &states[0]); si_emit_one_viewport(ctx, &states[0]);
return; return;
} }
@ -500,7 +516,10 @@ static void si_emit_viewports(struct si_context *ctx)
/* All registers in the array need to be updated if any of them is changed. /* All registers in the array need to be updated if any of them is changed.
* This is a hardware requirement. * This is a hardware requirement.
*/ */
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + 0, SI_MAX_VIEWPORTS * 6); radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + 0, SI_MAX_VIEWPORTS * 6);
radeon_end();
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++)
si_emit_one_viewport(ctx, &states[i]); si_emit_one_viewport(ctx, &states[i]);
} }
@ -528,21 +547,25 @@ static void si_emit_depth_ranges(struct si_context *ctx)
if (!ctx->vs_writes_viewport_index) { if (!ctx->vs_writes_viewport_index) {
si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, &zmin, &zmax); si_viewport_zmin_zmax(&states[0], clip_halfz, window_space, &zmin, &zmax);
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2); radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
radeon_emit(cs, fui(zmin)); radeon_emit(cs, fui(zmin));
radeon_emit(cs, fui(zmax)); radeon_emit(cs, fui(zmax));
radeon_end();
return; return;
} }
/* All registers in the array need to be updated if any of them is changed. /* All registers in the array need to be updated if any of them is changed.
* This is a hardware requirement. * This is a hardware requirement.
*/ */
radeon_begin(cs);
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, SI_MAX_VIEWPORTS * 2); radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, SI_MAX_VIEWPORTS * 2);
for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) { for (unsigned i = 0; i < SI_MAX_VIEWPORTS; i++) {
si_viewport_zmin_zmax(&states[i], clip_halfz, window_space, &zmin, &zmax); si_viewport_zmin_zmax(&states[i], clip_halfz, window_space, &zmin, &zmax);
radeon_emit(cs, fui(zmin)); radeon_emit(cs, fui(zmin));
radeon_emit(cs, fui(zmax)); radeon_emit(cs, fui(zmax));
} }
radeon_end();
} }
static void si_emit_viewport_states(struct si_context *ctx) static void si_emit_viewport_states(struct si_context *ctx)
@ -631,16 +654,20 @@ static void si_emit_window_rectangles(struct si_context *sctx)
else else
rule = outside[num_rectangles - 1]; rule = outside[num_rectangles - 1];
radeon_begin(cs);
radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE, radeon_opt_set_context_reg(sctx, R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE,
rule); rule);
if (num_rectangles == 0) if (num_rectangles == 0) {
radeon_end();
return; return;
}
radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2); radeon_set_context_reg_seq(cs, R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2);
for (unsigned i = 0; i < num_rectangles; i++) { for (unsigned i = 0; i < num_rectangles; i++) {
radeon_emit(cs, S_028210_TL_X(rects[i].minx) | S_028210_TL_Y(rects[i].miny)); radeon_emit(cs, S_028210_TL_X(rects[i].minx) | S_028210_TL_Y(rects[i].miny));
radeon_emit(cs, S_028214_BR_X(rects[i].maxx) | S_028214_BR_Y(rects[i].maxy)); radeon_emit(cs, S_028214_BR_X(rects[i].maxx) | S_028214_BR_Y(rects[i].maxy));
} }
radeon_end();
} }
static void si_set_window_rectangles(struct pipe_context *ctx, bool include, static void si_set_window_rectangles(struct pipe_context *ctx, bool include,