radv: Refactor WRITE_DATA helper function.

Create a version of this function that takes a CS and queue family.
move it to radv_cs.h so it can be called from multiple other files.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25770>
This commit is contained in:
Timur Kristóf 2023-10-18 14:29:55 +02:00 committed by Marge Bot
parent 1b988af0ad
commit 107473162e
5 changed files with 44 additions and 25 deletions

View file

@ -277,18 +277,10 @@ radv_queue_family_to_ring(const struct radv_physical_device *physical_device, en
}
static void
radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, unsigned count,
const uint32_t *data)
radv_write_data(struct radv_cmd_buffer *cmd_buffer, const unsigned engine_sel, const uint64_t va, const unsigned count,
const uint32_t *data, const bool predicating)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit_array(cs, data, count);
radv_cs_write_data(cmd_buffer->device, cmd_buffer->cs, cmd_buffer->qf, engine_sel, va, count, data, predicating);
}
static void
@ -296,7 +288,7 @@ radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, ui
{
uint32_t *zeroes = alloca(size);
memset(zeroes, 0, size);
radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes);
radv_write_data(cmd_buffer, engine_sel, va, size / 4, zeroes, false);
}
static void
@ -554,7 +546,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
va += 4;
++cmd_buffer->state.trace_id;
radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id);
radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, false);
radeon_check_space(cmd_buffer->device->ws, cs, 2);
@ -769,7 +761,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip
data[0] = pipeline_address;
data[1] = pipeline_address >> 32;
radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
}
static void
@ -785,7 +777,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr
data[0] = vb_ptr;
data[1] = vb_ptr >> 32;
radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
}
static void
@ -802,7 +794,7 @@ radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader
data[0] = prolog_address;
data[1] = prolog_address >> 32;
radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
}
void
@ -832,7 +824,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi
data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
}
radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data);
radv_write_data(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data, false);
}
const struct radv_userdata_info *

View file

@ -224,4 +224,31 @@ radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, cons
radeon_emit(cs, 4); /* poll interval */
}
ALWAYS_INLINE static unsigned
radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
{
assert(qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE);
/* Return the correct cdw at the end of the packet so the caller can assert it. */
const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, false));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
return cdw_end;
}
ALWAYS_INLINE static void
radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
const bool predicating)
{
ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, engine_sel, va, count, predicating);
radeon_emit_array(cs, dwords, count);
assert(cs->cdw == cdw_end);
}
#endif /* RADV_CS_H */

View file

@ -2068,7 +2068,7 @@ unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_device_init_msaa(struct radv_device *device);
VkResult radv_device_init_vrs_state(struct radv_device *device);
void radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects);

View file

@ -1777,11 +1777,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
/* written prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -1802,7 +1802,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
@ -1938,11 +1938,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
/* written prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
cmd_buffer->state.active_prims_xfb_gds_queries--;
@ -1960,7 +1960,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
cmd_buffer->state.active_prims_gen_gds_queries--;

View file

@ -2010,7 +2010,7 @@ radv_device_init_msaa(struct radv_device *device)
}
void
radv_emit_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm)
{
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));