mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
nvk: Add a MME based CmdCopyQueryPoolResults implementation
This adds a MME based approach to the queries copy that allow us to not switch subchannel when possible. Signed-off-by: Mary Guillemard <mary@mary.zone>
This commit is contained in:
parent
1a48288455
commit
1aaeb207dc
4 changed files with 157 additions and 4 deletions
|
|
@ -556,6 +556,14 @@ mme_load(struct mme_builder *b)
|
|||
UNREACHABLE("Unsupported GPU class");
|
||||
}
|
||||
|
||||
static inline struct mme_value64
|
||||
mme_load_value64(struct mme_builder *b)
|
||||
{
|
||||
struct mme_value lo = mme_load(b);
|
||||
struct mme_value hi = mme_load(b);
|
||||
return mme_value64(lo, hi);
|
||||
}
|
||||
|
||||
static inline struct mme_value64
|
||||
mme_load_addr64(struct mme_builder *b)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
|
|||
[NVK_MME_SET_VIEWPORT_MIN_MAX_Z] = nvk_mme_set_viewport_min_max_z,
|
||||
[NVK_MME_SET_Z_CLAMP] = nvk_mme_set_z_clamp,
|
||||
[NVK_MME_SET_STATISTICS_COUNTERS] = nvk_mme_set_statistics_counters,
|
||||
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
|
||||
};
|
||||
|
||||
static const struct nvk_mme_test_case *mme_tests[NVK_MME_COUNT] = {
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ enum nvk_mme {
|
|||
NVK_MME_SET_VIEWPORT_MIN_MAX_Z,
|
||||
NVK_MME_SET_Z_CLAMP,
|
||||
NVK_MME_SET_STATISTICS_COUNTERS,
|
||||
NVK_MME_COPY_QUERIES,
|
||||
|
||||
NVK_MME_COUNT,
|
||||
};
|
||||
|
|
@ -252,6 +253,7 @@ void nvk_mme_set_conservative_raster_state(struct mme_builder *b);
|
|||
void nvk_mme_set_viewport_min_max_z(struct mme_builder *b);
|
||||
void nvk_mme_set_z_clamp(struct mme_builder *b);
|
||||
void nvk_mme_set_statistics_counters(struct mme_builder *b);
|
||||
void nvk_mme_copy_queries(struct mme_builder *b);
|
||||
|
||||
uint32_t nvk_mme_tess_params(mesa_shader_stage stage,
|
||||
enum nak_ts_domain domain,
|
||||
|
|
|
|||
|
|
@ -1068,6 +1068,8 @@ nvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
|
||||
VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
|
||||
const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
if (unlikely(!queryCount))
|
||||
return;
|
||||
|
|
@ -1077,7 +1079,7 @@ nvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
uint64_t avail_addr = nvk_query_available_addr(pool, firstQuery + i);
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
|
||||
__push_mthd(p, SUBC_NV9097, NV906F_SEMAPHOREA);
|
||||
__push_mthd(p, nvk_cmd_buffer_last_subchannel(cmd), NV906F_SEMAPHOREA);
|
||||
P_NV906F_SEMAPHOREA(p, avail_addr >> 32);
|
||||
P_NV906F_SEMAPHOREB(p, (avail_addr & UINT32_MAX) >> 2);
|
||||
P_NV906F_SEMAPHOREC(p, 1);
|
||||
|
|
@ -1089,9 +1091,49 @@ nvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t dst_addr = vk_buffer_address(&dst_buffer->vk, dstOffset);
|
||||
nvk_meta_copy_query_pool_results(cmd, pool, firstQuery, queryCount,
|
||||
dst_addr, stride, flags);
|
||||
const uint64_t dst_addr = vk_buffer_address(&dst_buffer->vk, dstOffset);
|
||||
|
||||
/* Allow to use MME for copy only if we have a small amount of queries on
|
||||
* Turing+. We also ensure it doesn't cause a switch to 3D subchannel on
|
||||
* Turing as it's missing MME on compute.
|
||||
*/
|
||||
const bool should_use_mme_copy =
|
||||
queryCount <= 5 && pdev->info.cls_eng3d >= TURING_A &&
|
||||
(nvk_cmd_buffer_last_subchannel(cmd) != SUBC_NV90C0 ||
|
||||
pdev->info.cls_compute >= AMPERE_COMPUTE_B);
|
||||
|
||||
if (!should_use_mme_copy) {
|
||||
nvk_meta_copy_query_pool_results(cmd, pool, firstQuery, queryCount,
|
||||
dst_addr, stride, flags);
|
||||
} else {
|
||||
uint64_t report_addr = nvk_query_report_addr(pool, firstQuery);
|
||||
const uint64_t available_addr = nvk_query_available_addr(pool, firstQuery);
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP)
|
||||
report_addr += offsetof(struct nvk_query_report, timestamp);
|
||||
else
|
||||
report_addr += offsetof(struct nvk_query_report, value);
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 14);
|
||||
if (nvk_cmd_buffer_last_subchannel(cmd) == SUBC_NV90C0 &&
|
||||
pdev->info.cls_compute >= AMPERE_COMPUTE_B)
|
||||
P_1INC(p, NVC7C0, CALL_MME_MACRO(NVK_MME_COPY_QUERIES));
|
||||
else
|
||||
P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_COPY_QUERIES));
|
||||
P_INLINE_DATA(p, report_addr >> 32);
|
||||
P_INLINE_DATA(p, report_addr);
|
||||
P_INLINE_DATA(p, available_addr >> 32);
|
||||
P_INLINE_DATA(p, available_addr);
|
||||
P_INLINE_DATA(p, nvk_query_available_stride_B(pool));
|
||||
P_INLINE_DATA(p, vk_query_pool_report_count(&pool->vk));
|
||||
P_INLINE_DATA(p, pool->query_stride);
|
||||
P_INLINE_DATA(p, queryCount);
|
||||
P_INLINE_DATA(p, dst_addr >> 32);
|
||||
P_INLINE_DATA(p, dst_addr);
|
||||
P_INLINE_DATA(p, stride >> 32);
|
||||
P_INLINE_DATA(p, stride);
|
||||
P_INLINE_DATA(p, flags);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1152,3 +1194,103 @@ const struct nvk_mme_test_case nvk_mme_set_statistics_counters_tests[] = {{
|
|||
{NV9097_SET_STATISTICS_COUNTER, 0x100},
|
||||
{}},
|
||||
}, {}};
|
||||
|
||||
/* This helper is quite convoluted because we only have 4 registers to work
|
||||
* with when writing a report result */
|
||||
static void
|
||||
nvk_mme_write_query(struct mme_builder *b,
|
||||
struct mme_value64 dst_addr,
|
||||
struct mme_value idx,
|
||||
struct mme_value flags,
|
||||
struct mme_value64 result)
|
||||
{
|
||||
struct mme_value result_64_bit = mme_and(b, flags, mme_imm(VK_QUERY_RESULT_64_BIT));
|
||||
mme_if(b, ine, result_64_bit, mme_zero()) {
|
||||
struct mme_value report_offset = mme_sll(b, idx, mme_imm(3));
|
||||
struct mme_value64 report_addr =
|
||||
mme_add64(b, dst_addr, mme_value64(report_offset, mme_zero()));
|
||||
mme_free_reg(b, report_offset);
|
||||
|
||||
mme_store_global(b, report_addr, result.lo);
|
||||
|
||||
mme_add64_to(b, report_addr, report_addr, mme_imm64(4));
|
||||
mme_store_global(b, report_addr, result.hi);
|
||||
mme_free_reg64(b, report_addr);
|
||||
}
|
||||
|
||||
mme_if(b, ieq, result_64_bit, mme_zero()) {
|
||||
struct mme_value report_offset = mme_sll(b, idx, mme_imm(2));
|
||||
struct mme_value64 report_addr =
|
||||
mme_add64(b, dst_addr, mme_value64(report_offset, mme_zero()));
|
||||
mme_free_reg(b, report_offset);
|
||||
|
||||
mme_store_global(b, report_addr, result.lo);
|
||||
mme_free_reg64(b, report_addr);
|
||||
}
|
||||
mme_free_reg(b, result_64_bit);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_mme_copy_queries(struct mme_builder *b)
|
||||
{
|
||||
if (b->devinfo->cls_eng3d < TURING_A)
|
||||
return;
|
||||
|
||||
struct mme_value64 report_addr = mme_load_addr64(b);
|
||||
struct mme_value64 available_addr = mme_load_addr64(b);
|
||||
struct mme_value available_stride = mme_load(b);
|
||||
struct mme_value report_count = mme_load(b);
|
||||
struct mme_value query_stride = mme_load(b);
|
||||
struct mme_value query_count = mme_load(b);
|
||||
struct mme_value64 dst_addr = mme_load_addr64(b);
|
||||
struct mme_value64 dst_stride = mme_load_addr64(b);
|
||||
struct mme_value flags = mme_load(b);
|
||||
|
||||
/* Now handle queries */
|
||||
mme_while(b, ine, query_count, mme_zero()) {
|
||||
/* We load available and determine if a result need to be written */
|
||||
mme_tu104_read_fifoed(b, available_addr, mme_imm(1));
|
||||
struct mme_value available = mme_load(b);
|
||||
struct mme_value write_results =
|
||||
mme_and(b, flags, mme_imm(VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||||
mme_or_to(b, write_results, write_results, available);
|
||||
|
||||
mme_if(b, ine, write_results, mme_zero()) {
|
||||
struct mme_value r = mme_mov(b, mme_zero());
|
||||
mme_while(b, ine, r, report_count) {
|
||||
/* Setup MME fifo read, we only have 7 registers to work with so
|
||||
* we agressively free registers */
|
||||
STATIC_ASSERT(sizeof(struct nvk_query_report) % 2 == 0);
|
||||
struct mme_value current_report_offs = mme_sll(
|
||||
b, r, mme_imm(util_logbase2(sizeof(struct nvk_query_report))));
|
||||
struct mme_value64 current_report_addr = mme_add64(
|
||||
b, report_addr, mme_value64(current_report_offs, mme_zero()));
|
||||
mme_tu104_read_fifoed(b, current_report_addr, mme_imm(2));
|
||||
mme_free_reg(b, current_report_offs);
|
||||
mme_free_reg64(b, current_report_addr);
|
||||
|
||||
struct mme_value64 report = mme_load_value64(b);
|
||||
nvk_mme_write_query(b, dst_addr, r, flags, report);
|
||||
mme_free_reg64(b, report);
|
||||
|
||||
mme_add_to(b, r, r, mme_imm(1));
|
||||
}
|
||||
}
|
||||
mme_free_reg(b, write_results);
|
||||
|
||||
/* Finally write available if needed */
|
||||
struct mme_value with_availability =
|
||||
mme_and(b, flags, mme_imm(VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||||
mme_if(b, ine, with_availability, mme_zero()) {
|
||||
nvk_mme_write_query(b, dst_addr, report_count, flags,
|
||||
mme_value64(available, mme_zero()));
|
||||
}
|
||||
mme_free_reg(b, with_availability);
|
||||
mme_free_reg(b, available);
|
||||
|
||||
mme_sub_to(b, query_count, query_count, mme_imm(1));
|
||||
mme_add64_to(b, report_addr, report_addr, mme_value64(query_stride, mme_zero()));
|
||||
mme_add64_to(b, available_addr, available_addr, mme_value64(available_stride, mme_zero()));
|
||||
mme_add64_to(b, dst_addr, dst_addr, dst_stride);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue