radv: move conditional rendering state to radv_cond_render_state

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41152>
This commit is contained in:
Samuel Pitoiset 2026-04-24 10:58:45 +02:00 committed by Marge Bot
parent 57ecb1c1ec
commit 9feb722b31
5 changed files with 101 additions and 92 deletions

View file

@ -339,6 +339,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
bool old_predicating = false;
uint64_t pred_offset;
VkPipelineLayout layout;
@ -378,10 +379,10 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
if (pred_offset) {
pred_offset += 8 * subresourceRange->baseMipLevel;
old_predicating = cmd_buffer->state.predicating;
old_predicating = cond_render->enabled;
radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
cmd_buffer->state.predicating = true;
cond_render->enabled = true;
}
radv_meta_bind_graphics_pipeline(cmd_buffer, pipeline);
@ -408,17 +409,15 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
if (pred_offset) {
pred_offset += 8 * subresourceRange->baseMipLevel;
cmd_buffer->state.predicating = old_predicating;
cond_render->enabled = old_predicating;
radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
if (cmd_buffer->state.predication_type != -1) {
if (cond_render->type != -1) {
/* Restore previous conditional rendering user state. */
const uint64_t pred_va = pdev->info.has_32bit_predication ? cmd_buffer->state.user_predication_va
: cmd_buffer->state.emulated_predication_va;
const uint64_t pred_va = pdev->info.has_32bit_predication ? cond_render->user_va : cond_render->emulated_va;
radv_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
cmd_buffer->state.predication_op, pred_va);
radv_emit_set_predication_state(cmd_buffer, cond_render->type, cond_render->op, pred_va);
}
}

View file

@ -2714,7 +2714,7 @@ radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
const uint64_t va = radv_shader_get_va(shader);
radv_cs_cp_dma_prefetch(device, cs, va, shader->code_size, cmd_buffer->state.predicating);
radv_cs_cp_dma_prefetch(device, cs, va, shader->code_size, cmd_buffer->state.cond_render.enabled);
}
ALWAYS_INLINE static void
@ -5049,7 +5049,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image
/* Use the fastest way when both aspects are used. */
ASSERTED unsigned cdw_end =
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating);
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.cond_render.enabled);
radeon_begin(cs);
@ -5074,13 +5074,13 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image
value = ds_clear_value.stencil;
}
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, cmd_buffer->state.predicating);
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, cmd_buffer->state.cond_render.enabled);
}
}
if (cmd_buffer->qf == RADV_QUEUE_GENERAL && pdev->info.has_load_ctx_reg_pkt) {
radeon_check_space(device->ws, cs->b, 2);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.predicating);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.cond_render.enabled);
}
}
@ -5097,7 +5097,7 @@ radv_update_hiz_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
const uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
ASSERTED unsigned cdw_end =
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating);
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.cond_render.enabled);
radeon_begin(cs);
for (uint32_t l = 0; l < level_count; l++)
@ -5124,7 +5124,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
ASSERTED unsigned cdw_end =
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating);
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.cond_render.enabled);
radeon_begin(cs);
@ -5315,7 +5315,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im
uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
ASSERTED unsigned cdw_end =
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating);
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.cond_render.enabled);
radeon_begin(cs);
@ -5329,7 +5329,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im
if (cmd_buffer->qf == RADV_QUEUE_GENERAL && pdev->info.has_load_ctx_reg_pkt) {
radeon_check_space(device->ws, cs->b, 2);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.predicating);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.cond_render.enabled);
}
} else {
/* Some default value we can set in the update. */
@ -5387,10 +5387,10 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
if (pdev->info.has_load_ctx_reg_pkt) {
ac_emit_cp_load_context_reg_index(cs->b, reg, 2, va, cmd_buffer->state.predicating);
ac_emit_cp_load_context_reg_index(cs->b, reg, 2, va, cmd_buffer->state.cond_render.enabled);
} else {
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, reg >> 2, AC_CP_COPY_DATA_COUNT_SEL,
cmd_buffer->state.predicating);
cmd_buffer->state.cond_render.enabled);
}
}
@ -7805,7 +7805,7 @@ radv_init_default_state(struct radv_cmd_buffer *cmd_buffer)
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.cond_render.type = -1;
if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
vk_dynamic_graphics_state_init(&cmd_buffer->state.dynamic.vk);
@ -7853,7 +7853,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cmd_buffer->state.mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
cmd_buffer->state.cond_render.mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
}
if (pdev->info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) {
@ -10729,12 +10729,12 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!state->predicating)
if (!state->cond_render.enabled)
return;
uint64_t va = state->user_predication_va;
uint64_t va = state->cond_render.user_va;
if (!state->predication_type) {
if (!state->cond_render.type) {
/* Invert the condition the first time it is needed. */
if (!*inv_emitted) {
*inv_emitted = true;
@ -10796,7 +10796,7 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_cou
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.cond_render.enabled));
radeon_emit(vertex_count);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_end();
@ -10819,7 +10819,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
radeon_emit(PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.cond_render.enabled));
radeon_emit(max_index_count);
radeon_emit(index_va);
radeon_emit(index_va >> 32);
@ -10846,7 +10846,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr;
uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
const bool sqtt_en = !!device->sqtt.bo;
bool predicating = cmd_buffer->state.predicating;
bool predicating = cmd_buffer->state.cond_render.enabled;
assert(base_reg);
radv_invalidate_vertex_draw_state(cmd_buffer);
@ -10895,7 +10895,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
const struct radv_shader *mesh_shader = cmd_buffer->state.shaders[MESA_SHADER_MESH];
struct radv_cmd_stream *cs = cmd_buffer->cs;
uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr;
bool predicating = cmd_buffer->state.predicating;
bool predicating = cmd_buffer->state.cond_render.enabled;
const bool sqtt_en = !!device->sqtt.bo;
assert(base_reg || (!cmd_buffer->state.uses_drawid && !mesh_shader->info.cs.uses_grid_size));
@ -10936,7 +10936,7 @@ radv_cs_emit_dispatch_taskmesh_direct_ace_packet(const struct radv_device *devic
const uint32_t x, const uint32_t y, const uint32_t z)
{
const struct radv_shader *task_shader = cmd_state->shaders[MESA_SHADER_TASK];
const bool predicating = cmd_state->predicating;
const bool predicating = cmd_state->cond_render.enabled;
const uint32_t dispatch_initiator =
device->dispatch_initiator_task | S_00B800_CS_W32_EN(task_shader->info.wave_size == 32);
const uint32_t ring_entry_reg = radv_get_user_sgpr(task_shader, AC_UD_TASK_RING_ENTRY);
@ -10992,7 +10992,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *mesh_shader = cmd_state->shaders[MESA_SHADER_MESH];
const bool predicating = cmd_state->predicating;
const bool predicating = cmd_state->cond_render.enabled;
const uint32_t ring_entry_reg = radv_get_user_sgpr(mesh_shader, AC_UD_TASK_RING_ENTRY);
@ -11268,7 +11268,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct
}
if (device->sqtt.bo)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.cond_render.enabled);
}
ALWAYS_INLINE static void
@ -11307,7 +11307,7 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r
}
if (device->sqtt.bo)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.cond_render.enabled);
}
static void
@ -11317,7 +11317,7 @@ radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x
struct radv_cmd_stream *cs = cmd_buffer->cs;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, cmd_buffer->state.predicating));
radeon_emit(PKT3(PKT3_DISPATCH_MESH_DIRECT, 3, cmd_buffer->state.cond_render.enabled));
radeon_emit(x);
radeon_emit(y);
radeon_emit(z);
@ -11359,7 +11359,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x
}
if (device->sqtt.bo)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.cond_render.enabled);
}
static void
@ -11410,8 +11410,8 @@ radv_emit_direct_taskmesh_draw_packets(const struct radv_device *device, struct
const unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
radv_emit_userdata_task(cmd_state, ace_cs, x, y, z);
radv_cs_emit_compute_predication(device, cmd_state, ace_cs, cmd_state->mec_inv_pred_va,
&cmd_state->mec_inv_pred_emitted, ace_predication_size);
radv_cs_emit_compute_predication(device, cmd_state, ace_cs, cmd_state->cond_render.mec_inv_pred_va,
&cmd_state->cond_render.mec_inv_pred_emitted, ace_predication_size);
if (!view_mask) {
radv_cs_emit_dispatch_taskmesh_direct_ace_packet(device, cmd_state, ace_cs, x, y, z);
@ -11426,7 +11426,7 @@ radv_emit_direct_taskmesh_draw_packets(const struct radv_device *device, struct
}
if (device->sqtt.bo)
radv_emit_thread_trace_marker(device, ace_cs, cmd_state->predicating);
radv_emit_thread_trace_marker(device, ace_cs, cmd_state->cond_render.enabled);
}
static void
@ -11459,8 +11459,8 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc
ace_predication_size += 2 * 5 + 6 + 6 * num_views;
}
radv_cs_emit_compute_predication(device, cmd_state, ace_cs, cmd_state->mec_inv_pred_va,
&cmd_state->mec_inv_pred_emitted, ace_predication_size);
radv_cs_emit_compute_predication(device, cmd_state, ace_cs, cmd_state->cond_render.mec_inv_pred_va,
&cmd_state->cond_render.mec_inv_pred_emitted, ace_predication_size);
if (workaround_cond_va) {
ac_emit_cp_cond_exec(ace_cs->b, pdev->info.gfx_level, info->count_va,
@ -11518,7 +11518,7 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct
}
if (device->sqtt.bo && !use_multi)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.cond_render.enabled);
}
static uint64_t
@ -13788,7 +13788,7 @@ radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommand
radeon_check_space(device->ws, cs->b, 64);
device->ws->cs_chain_dgc_ib(cs->b, main_ib_va, cmdbuf_size >> 2, main_trailer_va, cmd_buffer->state.predicating);
device->ws->cs_chain_dgc_ib(cs->b, main_ib_va, cmdbuf_size >> 2, main_trailer_va, cmd_buffer->state.cond_render.enabled);
if (task_shader) {
const uint32_t ace_cmdbuf_size = radv_get_indirect_ace_cmdbuf_size(pGeneratedCommandsInfo);
@ -13798,7 +13798,7 @@ radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommand
assert(ace_cs->b);
device->ws->cs_chain_dgc_ib(ace_cs->b, ace_ib_va, ace_cmdbuf_size >> 2, ace_trailer_va,
cmd_buffer->state.predicating);
cmd_buffer->state.cond_render.enabled);
}
}
@ -13847,16 +13847,16 @@ radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPr
*/
const bool suspend_conditional_rendering =
(cmd_buffer->qf == RADV_QUEUE_COMPUTE || radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK));
const bool old_predicating = cmd_buffer->state.predicating;
const bool old_predicating = cmd_buffer->state.cond_render.enabled;
if (suspend_conditional_rendering && cmd_buffer->state.predicating) {
cmd_buffer->state.predicating = false;
if (suspend_conditional_rendering && cmd_buffer->state.cond_render.enabled) {
cmd_buffer->state.cond_render.enabled = false;
}
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, cmd_buffer, old_predicating);
if (suspend_conditional_rendering) {
cmd_buffer->state.predicating = old_predicating;
cmd_buffer->state.cond_render.enabled = old_predicating;
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
@ -13908,7 +13908,7 @@ radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPr
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
radeon_check_space(device->ws, cs->b, 2);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.predicating);
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.cond_render.enabled);
}
/* The Vulkan spec 1.4.349 says:
@ -13977,7 +13977,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned dispatch_initiator = device->dispatch_initiator;
struct radeon_winsys *ws = device->ws;
bool predicating = cmd_buffer->state.predicating;
bool predicating = cmd_buffer->state.cond_render.enabled;
struct radv_cmd_stream *cs = radv_get_pm4_cs(cmd_buffer);
const uint32_t grid_size_offset = radv_get_user_sgpr_loc(compute_shader, AC_UD_CS_GRID_SIZE);
@ -14043,8 +14043,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
const unsigned ace_predication_size =
4 /* DISPATCH_INDIRECT */ + (needs_align32_workaround ? 6 * 3 /* 3x COPY_DATA */ : 0);
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.mec_inv_pred_va,
&cmd_buffer->state.mec_inv_pred_emitted, ace_predication_size);
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.cond_render.mec_inv_pred_va,
&cmd_buffer->state.cond_render.mec_inv_pred_emitted, ace_predication_size);
if (needs_align32_workaround) {
const uint64_t unaligned_va = indirect_va;
@ -14075,8 +14075,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
radv_emit_indirect_buffer(cs, info->indirect_va, true);
if (cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.mec_inv_pred_va,
&cmd_buffer->state.mec_inv_pred_emitted, 3 /* PKT3_DISPATCH_INDIRECT */);
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.cond_render.mec_inv_pred_va,
&cmd_buffer->state.cond_render.mec_inv_pred_emitted, 3 /* PKT3_DISPATCH_INDIRECT */);
predicating = false;
}
@ -14162,8 +14162,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
}
if (cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.mec_inv_pred_va,
&cmd_buffer->state.mec_inv_pred_emitted, 5 /* DISPATCH_DIRECT size */);
radv_cs_emit_compute_predication(device, &cmd_buffer->state, cs, cmd_buffer->state.cond_render.mec_inv_pred_va,
&cmd_buffer->state.cond_render.mec_inv_pred_emitted, 5 /* DISPATCH_DIRECT size */);
predicating = false;
}
@ -14191,7 +14191,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
}
if (device->sqtt.bo)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.cond_render.enabled);
assert(cs->b->cdw <= cdw_max);
}
@ -15480,6 +15480,7 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
struct radv_cmd_stream *cs = cmd_buffer->cs;
unsigned pred_op = PREDICATION_OP_BOOL32;
uint64_t emulated_va = 0;
@ -15542,17 +15543,19 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va
}
/* Store conditional rendering user info. */
cmd_buffer->state.predicating = true;
cmd_buffer->state.predication_type = draw_visible;
cmd_buffer->state.predication_op = pred_op;
cmd_buffer->state.user_predication_va = va;
cmd_buffer->state.emulated_predication_va = emulated_va;
cmd_buffer->state.mec_inv_pred_emitted = false;
cond_render->enabled = true;
cond_render->type = draw_visible;
cond_render->op = pred_op;
cond_render->user_va = va;
cond_render->emulated_va = emulated_va;
cond_render->mec_inv_pred_emitted = false;
}
void
radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
radv_emit_set_predication_state(cmd_buffer, false, 0, 0);
} else {
@ -15560,12 +15563,12 @@ radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer)
}
/* Reset conditional rendering user info. */
cmd_buffer->state.predicating = false;
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.predication_op = 0;
cmd_buffer->state.user_predication_va = 0;
cmd_buffer->state.emulated_predication_va = 0;
cmd_buffer->state.mec_inv_pred_emitted = false;
cond_render->enabled = false;
cond_render->type = -1;
cond_render->op = 0;
cond_render->user_va = 0;
cond_render->emulated_va = 0;
cond_render->mec_inv_pred_emitted = false;
}
/* VK_EXT_conditional_rendering */

View file

@ -334,12 +334,23 @@ struct radv_index_buffer_state {
uint32_t max_index_count;
};
struct radv_cond_render_state {
uint64_t user_va; /* User predication VA. */
uint64_t emulated_va; /* Emulated VA if no 32-bit predication support. */
uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
int8_t type; /* -1: disabled, 0: normal, 1: inverted */
uint8_t op; /* 32-bit or 64-bit predicate value */
bool enabled;
bool enabled_save;
bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
bool suspended;
};
struct radv_cmd_state {
/* Vertex descriptors */
uint64_t vb_va;
unsigned vb_size;
bool predicating;
uint64_t dirty_dynamic;
uint64_t dirty;
@ -359,6 +370,7 @@ struct radv_cmd_state {
struct radv_dynamic_state dynamic;
struct radv_streamout_state streamout;
struct radv_index_buffer_state index_buffer;
struct radv_cond_render_state cond_render;
struct radv_rendering_state render;
@ -398,16 +410,6 @@ struct radv_cmd_state {
/* Whether any images that are not L2 coherent are dirty from the CB. */
bool rb_noncoherent_dirty;
/* Conditional rendering info. */
uint8_t predication_op; /* 32-bit or 64-bit predicate value */
int predication_type; /* -1: disabled, 0: normal, 1: inverted */
uint64_t user_predication_va; /* User predication VA. */
uint64_t emulated_predication_va; /* Emulated VA if no 32-bit predication support. */
uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */
bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
bool saved_user_cond_render;
bool is_user_cond_render_suspended;
/* Inheritance info. */
VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
bool inherited_occlusion_queries;
@ -849,20 +851,24 @@ void radv_upload_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
static inline void
radv_suspend_conditional_rendering(struct radv_cmd_buffer *cmd_buffer)
{
assert(!cmd_buffer->state.is_user_cond_render_suspended);
struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
cmd_buffer->state.saved_user_cond_render = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
cmd_buffer->state.is_user_cond_render_suspended = true;
assert(!cond_render->suspended);
cond_render->enabled_save = cond_render->enabled;
cond_render->enabled = false;
cond_render->suspended = true;
}
static inline void
radv_resume_conditional_rendering(struct radv_cmd_buffer *cmd_buffer)
{
assert(cmd_buffer->state.is_user_cond_render_suspended);
struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
cmd_buffer->state.predicating = cmd_buffer->state.saved_user_cond_render;
cmd_buffer->state.is_user_cond_render_suspended = false;
assert(cond_render->suspended);
cond_render->enabled = cond_render->enabled_save;
cond_render->suspended = false;
}
#endif /* RADV_CMD_BUFFER_H */

View file

@ -105,10 +105,10 @@ static void
radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
struct radv_cmd_stream *cs = cmd_buffer->cs;
bool predicating = cmd_buffer->state.predicating;
radv_cs_emit_cp_dma(device, cs, predicating, dst_va, src_va, size, flags);
radv_cs_emit_cp_dma(device, cs, cond_render->enabled, dst_va, src_va, size, flags);
/* CP DMA is executed in ME, but index buffers are read by PFP.
* This ensures that ME (CP DMA) is idle before PFP starts fetching
@ -117,7 +117,7 @@ radv_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t s
*/
if (flags & CP_DMA_SYNC) {
if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
ac_emit_cp_pfp_sync_me(cs->b, cmd_buffer->state.predicating);
ac_emit_cp_pfp_sync_me(cs->b, cond_render->enabled);
}
/* CP will see the sync flag and wait for all DMAs to complete. */
@ -190,8 +190,9 @@ void
radv_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_cond_render_state *cond_render = &cmd_buffer->state.cond_render;
radv_cs_cp_dma_prefetch(device, cmd_buffer->cs, va, size, cmd_buffer->state.predicating);
radv_cs_cp_dma_prefetch(device, cmd_buffer->cs, va, size, cond_render->enabled);
if (radv_device_fault_detection_enabled(device))
radv_cmd_buffer_trace_emit(cmd_buffer);

View file

@ -3077,7 +3077,7 @@ radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
* would be uninitialized).
*/
return cmd_buffer->qf == RADV_QUEUE_GENERAL && !radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK) &&
pGeneratedCommandsInfo->sequenceCountAddress != 0 && !cmd_buffer->state.predicating;
pGeneratedCommandsInfo->sequenceCountAddress != 0 && !cmd_buffer->state.cond_render.enabled;
}
VKAPI_ATTR void VKAPI_CALL
@ -3088,7 +3088,7 @@ radv_CmdPreprocessGeneratedCommandsEXT(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(radv_cmd_buffer, state_cmd_buffer, stateCommandBuffer);
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
const bool execution_is_predicating = state_cmd_buffer->state.predicating;
const bool execution_is_predicating = state_cmd_buffer->state.cond_render.enabled;
assert(layout->vk.usage & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT);
@ -3118,8 +3118,8 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
if (cond_render_enabled) {
params->predicating = true;
params->predication_va = state_cmd_buffer->state.user_predication_va;
params->predication_type = state_cmd_buffer->state.predication_type;
params->predication_va = state_cmd_buffer->state.cond_render.user_va;
params->predication_type = state_cmd_buffer->state.cond_render.type;
}
if (ies) {