radv: Add stricter space checks.

The check for max_dw means that none of checks triggered reliably
when we had an issue. Use a stricter reserved dw measure to increase
the probability of catching issues.

Adds a radeon_check_space to some places after cs_create as they
previously relied on the min. cs size, but that would still trigger
the checks.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20152>
This commit is contained in:
Bas Nieuwenhuizen 2022-12-05 00:54:14 +01:00
parent 4c204db0a7
commit 7893040f80
8 changed files with 33 additions and 13 deletions

View file

@ -34,8 +34,10 @@
static inline unsigned
radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
{
assert(cs->cdw <= cs->reserved_dw);
if (cs->max_dw - cs->cdw < needed)
ws->cs_grow(cs, needed);
cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed);
return cs->cdw + needed;
}
@ -43,7 +45,7 @@ static inline void
radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(cs->cdw + 2 + num <= cs->reserved_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
@ -60,7 +62,7 @@ static inline void
radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(cs->cdw + 2 + num <= cs->reserved_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
@ -77,7 +79,7 @@ static inline void
radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(cs->cdw + 3 <= cs->reserved_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
@ -87,7 +89,7 @@ static inline void
radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(cs->cdw + 2 + num <= cs->reserved_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
@ -105,7 +107,7 @@ radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_
unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(cs->cdw + 3 <= cs->reserved_dw);
assert(idx);
unsigned opcode = PKT3_SET_SH_REG_INDEX;
@ -121,7 +123,7 @@ static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(cs->cdw + 2 + num <= cs->reserved_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
@ -131,7 +133,7 @@ static inline void
radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(cs->cdw + 2 + num <= cs->reserved_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
@ -149,7 +151,7 @@ radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct ra
unsigned reg, unsigned idx, unsigned value)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(cs->cdw + 3 <= cs->reserved_dw);
assert(idx);
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
@ -167,7 +169,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->cdw + 3 <= cs->max_dw);
assert(cs->cdw + 3 <= cs->reserved_dw);
/*
* On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM),
@ -186,7 +188,7 @@ static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
assert(reg < CIK_UCONFIG_REG_OFFSET);
assert(cs->cdw + 6 <= cs->max_dw);
assert(cs->cdw + 6 <= cs->reserved_dw);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));

View file

@ -103,7 +103,7 @@ radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_
struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
struct radeon_cmdbuf *cs = &pipeline->base.cs;
cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16;
cs->reserved_dw = cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16;
cs->buf = malloc(cs->max_dw * 4);
radv_pipeline_emit_hw_cs(pdevice, cs, shader);

View file

@ -3751,8 +3751,8 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
struct radeon_cmdbuf *cs = &pipeline->base.cs;
cs->max_dw = 64;
ctx_cs->max_dw = 256;
cs->reserved_dw = cs->max_dw = 64;
ctx_cs->reserved_dw = ctx_cs->max_dw = 256;
cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
ctx_cs->buf = cs->buf + cs->max_dw;

View file

@ -1064,6 +1064,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
goto fail;
}
radeon_check_space(ws, cs, 512);
dest_cs[i] = cs;
if (scratch_bo)

View file

@ -114,6 +114,7 @@ struct radeon_cmdbuf {
* store and reload them between buf writes. */
uint64_t cdw; /* Number of used dwords. */
uint64_t max_dw; /* Maximum number of dwords. */
uint64_t reserved_dw; /* Number of dwords reserved through radeon_check_space() */
uint32_t *buf; /* The base pointer of the chunk. */
};

View file

@ -687,6 +687,8 @@ radv_begin_sqtt(struct radv_queue *queue)
if (!cs)
return false;
radeon_check_space(ws, cs, 256);
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@ -756,6 +758,8 @@ radv_end_sqtt(struct radv_queue *queue)
if (!cs)
return false;
radeon_check_space(ws, cs, 256);
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));

View file

@ -657,6 +657,8 @@ cik_create_gfx_config(struct radv_device *device)
if (!cs)
return;
radeon_check_space(device->ws, cs, 512);
si_emit_graphics(device, cs);
while (cs->cdw & 7) {

View file

@ -388,6 +388,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
cs->base.buf = (uint32_t *)cs->ib_mapped;
cs->base.cdw = 0;
cs->base.reserved_dw = 0;
cs->base.max_dw = ib_size / 4 - 4;
}
@ -397,6 +398,8 @@ radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
enum amd_ip_type ip_type = cs->hw_ip;
assert(cs->base.cdw <= cs->base.reserved_dw);
uint32_t ib_pad_dw_mask = MAX2(3, cs->ws->info.ib_pad_dw_mask[ip_type]);
uint32_t nop_packet = get_nop_packet(cs);
@ -442,6 +445,7 @@ radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
{
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
cs->base.cdw = 0;
cs->base.reserved_dw = 0;
cs->status = VK_SUCCESS;
for (unsigned i = 0; i < cs->num_buffers; ++i) {
@ -670,6 +674,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
if (parent->base.cdw + 4 > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, 4);
parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + 4);
/* Not setting the CHAIN bit will launch an IB2. */
radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
radeon_emit(&parent->base, child->ib.ib_mc_address);
@ -686,6 +692,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
if (parent->base.cdw + ib->cdw > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, ib->cdw);
parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + ib->cdw);
mapped = ws->base.buffer_map(ib->bo);
if (!mapped) {
parent->status = VK_ERROR_OUT_OF_HOST_MEMORY;
@ -704,6 +712,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cm
if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
parent->base.reserved_dw = MAX2(parent->base.reserved_dw, parent->base.cdw + child->base.cdw);
memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
parent->base.cdw += child->base.cdw;
}