nvk: Implement support for non graphics timestamp

Now that we have a unified layout for timestamp, we can implement
timestamp writes on DMA and Compute sub channels.

This also expose timestamp on non graphics queues.

Signed-off-by: Mary Guillemard <mary@mary.zone>
This commit is contained in:
Mary Guillemard 2026-05-05 14:16:00 +02:00
parent 1aaeb207dc
commit 7c329f1f9e
2 changed files with 59 additions and 13 deletions

View file

@ -1759,10 +1759,11 @@ nvk_GetPhysicalDeviceQueueFamilyProperties2(
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
p->queueFamilyProperties.queueCount = queue_family->queue_count;
if (queue_family->queue_flags & VK_QUEUE_GRAPHICS_BIT) {
if (queue_family->queue_flags &
(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT)) {
p->queueFamilyProperties.timestampValidBits = 64;
} else {
/* TODO: Timestamps on non-graphics queues */
p->queueFamilyProperties.timestampValidBits = 0;
}
p->queueFamilyProperties.minImageTransferGranularity =

View file

@ -16,6 +16,7 @@
#include "vk_common_entrypoints.h"
#include "vk_meta.h"
#include "vk_pipeline.h"
#include "vk_synchronization.h"
#include "cl/nvk_query.h"
#include "compiler/nir/nir.h"
@ -26,6 +27,8 @@
#include "nv_push_cl906f.h"
#include "nv_push_cl9097.h"
#include "nv_push_cl90b5.h"
#include "nv_push_cl90c0.h"
#include "nv_push_cla0c0.h"
#include "nv_push_clc597.h"
#include "nv_push_clc7c0.h"
@ -404,18 +407,60 @@ nvk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
assert(pool->layout == NVK_QUERY_POOL_LAYOUT_TIMESTAMP_PACKED);
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
uint64_t report_addr = nvk_query_report_addr(pool, query);
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
.operation = OPERATION_RELEASE,
.release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
.pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stage),
.structure_size = STRUCTURE_SIZE_FOUR_WORDS,
});
uint8_t subc = nvk_cmd_buffer_last_subchannel(cmd);
if (subc == SUBC_NV9097) {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
P_IMMD(p, NV9097, FLUSH_PENDING_WRITES, 0);
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
.operation = OPERATION_RELEASE,
.release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
.pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stage),
.structure_size = STRUCTURE_SIZE_FOUR_WORDS,
});
} else if (subc == SUBC_NV90C0) {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
/* Compute SET_REPORT_SEMAPHORE_D doesn't provide a pipeline location
* meaning that we need to handle first synchronization scope here.
*
* Considering that if we are on the compute subchannel, we only really
* need to wait on anything that runs on compute.
*/
if (vk_expand_src_stage_flags2(stage) &
(VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_BLIT_BIT))
P_IMMD(p, NV90C0, WAIT_FOR_IDLE, 0);
P_MTHD(p, NV90C0, SET_REPORT_SEMAPHORE_A);
P_NV90C0_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
P_NV90C0_SET_REPORT_SEMAPHORE_B(p, report_addr);
P_NV90C0_SET_REPORT_SEMAPHORE_C(p, 1);
P_NV90C0_SET_REPORT_SEMAPHORE_D(p, {
.operation = OPERATION_RELEASE,
.structure_size = STRUCTURE_SIZE_FOUR_WORDS,
});
} else {
assert(subc == SUBC_NV90B5);
struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
P_MTHD(p, NV90B5, SET_SEMAPHORE_A);
P_NV90B5_SET_SEMAPHORE_A(p, report_addr >> 32);
P_NV90B5_SET_SEMAPHORE_B(p, report_addr);
P_NV90B5_SET_SEMAPHORE_PAYLOAD(p, 1);
P_IMMD(p, NV90B5, LAUNCH_DMA, {
.data_transfer_type = DATA_TRANSFER_TYPE_NONE,
.semaphore_type = SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE,
.flush_enable = FLUSH_ENABLE_TRUE,
/* Note: FLUSH_TYPE=SYS implicitly for NVC3B5+ */
});
}
/* From the Vulkan spec:
*