mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
hk: implement timestamps
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32434>
This commit is contained in:
parent
55cd987de2
commit
3f5086016b
8 changed files with 260 additions and 56 deletions
|
|
@ -245,3 +245,11 @@ struct agx_va *agx_va_alloc(struct agx_device *dev, uint64_t size_B,
|
|||
uint64_t align_B, enum agx_va_flags flags,
|
||||
uint64_t fixed_va);
|
||||
void agx_va_free(struct agx_device *dev, struct agx_va *va);
|
||||
|
||||
static inline bool
|
||||
agx_supports_timestamps(const struct agx_device *dev)
|
||||
{
|
||||
/* TODO: Ungate virtio once virglrenderer supports the timestamp uapi */
|
||||
return !dev->is_virtio &&
|
||||
(dev->params.feat_compat & DRM_ASAHI_FEAT_USER_TIMESTAMPS);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,12 @@ libagx_copy_query(global uint32_t *availability, global uint64_t *results,
|
|||
uint i = get_global_id(0);
|
||||
uint64_t dst = dst_addr + (((uint64_t)i) * dst_stride);
|
||||
uint32_t query = first_query + i;
|
||||
bool available = availability[query];
|
||||
|
||||
bool available;
|
||||
if (availability)
|
||||
available = availability[query];
|
||||
else
|
||||
available = (results[query] != LIBAGX_QUERY_UNAVAILABLE);
|
||||
|
||||
if (available || partial) {
|
||||
/* For occlusion queries, results[] points to the device global heap. We
|
||||
|
|
@ -109,6 +114,17 @@ libagx_write_u32s(constant struct libagx_imm_write *p)
|
|||
*(p[id].address) = p[id].value;
|
||||
}
|
||||
|
||||
/*
|
||||
* We set the source as volatile since the caching situation around timestamps
|
||||
* is a bit unclear. It might not be necessary but - absent hardware/firmware
|
||||
* documentation - this gives me peace of mind.
|
||||
*/
|
||||
KERNEL(1)
|
||||
libagx_copy_timestamp(global uint64_t *dest, volatile global uint64_t *src)
|
||||
{
|
||||
*dest = *src;
|
||||
}
|
||||
|
||||
KERNEL(1)
|
||||
libagx_write_u32(global uint32_t *address, uint32_t value)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,3 +24,5 @@ struct libagx_imm_write {
|
|||
GLOBAL(uint32_t) address;
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
#define LIBAGX_QUERY_UNAVAILABLE (uint64_t)((int64_t)-1)
|
||||
|
|
|
|||
|
|
@ -300,6 +300,19 @@ struct hk_scratch_req {
|
|||
bool preamble;
|
||||
};
|
||||
|
||||
/*
|
||||
* Represents a firmware timestamp request. Handle is a kernel timestamp object
|
||||
* handle, not a GEM handle.
|
||||
*
|
||||
* The kernel/firmware uses the handle/offset_B to write. We use the address to
|
||||
* read the results back. We could deduplicate this, but this is convenient.
|
||||
*/
|
||||
struct agx_timestamp_req {
|
||||
uint64_t addr;
|
||||
uint32_t handle;
|
||||
uint32_t offset_B;
|
||||
};
|
||||
|
||||
/*
|
||||
* hk_cs represents a single control stream, to be enqueued either to the
|
||||
* CDM or VDM for compute/3D respectively.
|
||||
|
|
@ -353,6 +366,14 @@ struct hk_cs {
|
|||
uint32_t calls, cmds, flushes;
|
||||
} stats;
|
||||
|
||||
/* Timestamp writes. Currently just compute end / fragment end. We could
|
||||
* flesh this out later if we want finer info. (We will, but it's not
|
||||
* required for conformance.)
|
||||
*/
|
||||
struct {
|
||||
struct agx_timestamp_req end;
|
||||
} timestamp;
|
||||
|
||||
/* Remaining state is for graphics only, ignored for compute */
|
||||
struct agx_tilebuffer_layout tib;
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "util/simple_mtx.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
#include "vulkan/wsi/wsi_common.h"
|
||||
#include "unstable_asahi_drm.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_shader_module.h"
|
||||
|
||||
|
|
@ -714,7 +715,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
|||
.sampledImageStencilSampleCounts = sample_counts,
|
||||
.storageImageSampleCounts = sample_counts,
|
||||
.maxSampleMaskWords = 1,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampComputeAndGraphics = agx_supports_timestamps(dev),
|
||||
.timestampPeriod = 1,
|
||||
.maxClipDistances = 8,
|
||||
.maxCullDistances = 8,
|
||||
|
|
@ -1378,7 +1379,8 @@ hk_GetPhysicalDeviceQueueFamilyProperties2(
|
|||
{
|
||||
p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
|
||||
p->queueFamilyProperties.queueCount = queue_family->queue_count;
|
||||
p->queueFamilyProperties.timestampValidBits = 0; // TODO 64;
|
||||
p->queueFamilyProperties.timestampValidBits =
|
||||
agx_supports_timestamps(&pdev->dev) ? 64 : 0;
|
||||
p->queueFamilyProperties.minImageTransferGranularity =
|
||||
(VkExtent3D){1, 1, 1};
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,12 @@ struct hk_query_report {
|
|||
uint64_t value;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
hk_has_available(const struct hk_query_pool *pool)
|
||||
{
|
||||
return pool->vk.query_type != VK_QUERY_TYPE_TIMESTAMP;
|
||||
}
|
||||
|
||||
static uint16_t *
|
||||
hk_pool_oq_index_ptr(const struct hk_query_pool *pool)
|
||||
{
|
||||
|
|
@ -55,6 +61,22 @@ hk_reports_per_query(struct hk_query_pool *pool)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_flush_if_timestamp(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
/* There might not be a barrier between the timestamp write and the copy
|
||||
* otherwise but we need one to give the CPU a chance to write the timestamp.
|
||||
* This could maybe optimized.
|
||||
*/
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
||||
perf_debug(dev, "Flushing for timestamp copy");
|
||||
hk_cmd_buffer_end_graphics(cmd);
|
||||
hk_cmd_buffer_end_compute(cmd);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
|
|
@ -64,16 +86,24 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
|||
struct hk_query_pool *pool;
|
||||
|
||||
bool occlusion = pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION;
|
||||
bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP;
|
||||
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
|
||||
|
||||
/* Workaround for DXVK on old kernels */
|
||||
if (!agx_supports_timestamps(&dev->dev))
|
||||
timestamp = false;
|
||||
|
||||
pool =
|
||||
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
|
||||
if (!pool)
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* We place the availability first and then data */
|
||||
pool->query_start = align(pool->vk.query_count * sizeof(uint32_t),
|
||||
sizeof(struct hk_query_report));
|
||||
pool->query_start = 0;
|
||||
if (hk_has_available(pool)) {
|
||||
pool->query_start = align(pool->vk.query_count * sizeof(uint32_t),
|
||||
sizeof(struct hk_query_report));
|
||||
}
|
||||
|
||||
uint32_t reports_per_query = hk_reports_per_query(pool);
|
||||
pool->query_stride = reports_per_query * sizeof(struct hk_query_report);
|
||||
|
|
@ -87,12 +117,33 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
|||
else
|
||||
bo_size += pool->query_stride * pool->vk.query_count;
|
||||
|
||||
pool->bo =
|
||||
agx_bo_create(&dev->dev, bo_size, 0, AGX_BO_WRITEBACK, "Query pool");
|
||||
/* The kernel requires that timestamp buffers are SHARED */
|
||||
enum agx_bo_flags flags = AGX_BO_WRITEBACK;
|
||||
if (timestamp)
|
||||
flags |= AGX_BO_SHARED;
|
||||
|
||||
pool->bo = agx_bo_create(&dev->dev, bo_size, 0, flags, "Query pool");
|
||||
if (!pool->bo) {
|
||||
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool), pAllocator);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
/* Timestamp buffers must be explicitly bound as such before we can use
|
||||
* them.
|
||||
*/
|
||||
if (timestamp) {
|
||||
int ret = dev->dev.ops.bo_bind_object(
|
||||
&dev->dev, pool->bo, &pool->handle, pool->bo->size, 0,
|
||||
ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS);
|
||||
|
||||
if (ret) {
|
||||
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool),
|
||||
pAllocator);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
assert(pool->handle && "handles are nonzero");
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t *oq_index = hk_pool_oq_index_ptr(pool);
|
||||
|
|
@ -135,6 +186,9 @@ hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
|
|||
hk_descriptor_table_remove(dev, &dev->occlusion_queries, oq_index[i]);
|
||||
}
|
||||
|
||||
if (pool->handle)
|
||||
dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle, 0);
|
||||
|
||||
agx_bo_unreference(&dev->dev, pool->bo);
|
||||
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
|
||||
}
|
||||
|
|
@ -142,6 +196,7 @@ hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
|
|||
static uint64_t
|
||||
hk_query_available_addr(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
assert(hk_has_available(pool));
|
||||
assert(query < pool->vk.query_count);
|
||||
return pool->bo->va->addr + query * sizeof(uint32_t);
|
||||
}
|
||||
|
|
@ -149,6 +204,7 @@ hk_query_available_addr(struct hk_query_pool *pool, uint32_t query)
|
|||
static uint32_t *
|
||||
hk_query_available_map(struct hk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
assert(hk_has_available(pool));
|
||||
assert(query < pool->vk.query_count);
|
||||
return (uint32_t *)agx_bo_map(pool->bo) + query;
|
||||
}
|
||||
|
|
@ -264,16 +320,45 @@ emit_zero_queries(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool,
|
|||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
uint64_t available = hk_query_available_addr(pool, first_index + i);
|
||||
uint64_t report = hk_query_report_addr(dev, pool, first_index + i);
|
||||
hk_queue_write(cmd, available, set_available, false);
|
||||
|
||||
uint64_t value = 0;
|
||||
if (hk_has_available(pool)) {
|
||||
uint64_t available = hk_query_available_addr(pool, first_index + i);
|
||||
hk_queue_write(cmd, available, set_available, false);
|
||||
} else {
|
||||
value = set_available ? 0 : LIBAGX_QUERY_UNAVAILABLE;
|
||||
}
|
||||
|
||||
/* XXX: is this supposed to happen on the begin? */
|
||||
for (unsigned j = 0; j < hk_reports_per_query(pool); ++j) {
|
||||
hk_queue_write(cmd, report + (j * sizeof(struct hk_query_report)), 0,
|
||||
false);
|
||||
hk_queue_write(cmd, report + (j * sizeof(struct hk_query_report)),
|
||||
value, false);
|
||||
hk_queue_write(cmd, report + (j * sizeof(struct hk_query_report)) + 4,
|
||||
0, false);
|
||||
value >> 32, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
host_zero_queries(struct hk_device *dev, struct hk_query_pool *pool,
|
||||
uint32_t first_index, uint32_t num_queries,
|
||||
bool set_available)
|
||||
{
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
struct hk_query_report *reports =
|
||||
hk_query_report_map(dev, pool, first_index + i);
|
||||
|
||||
uint64_t value = 0;
|
||||
if (hk_has_available(pool)) {
|
||||
uint32_t *available = hk_query_available_map(pool, first_index + i);
|
||||
*available = set_available;
|
||||
} else {
|
||||
value = set_available ? 0 : LIBAGX_QUERY_UNAVAILABLE;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < hk_reports_per_query(pool); ++j) {
|
||||
reports[j].value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -285,11 +370,7 @@ hk_ResetQueryPool(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery,
|
|||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
VK_FROM_HANDLE(hk_device, dev, device);
|
||||
|
||||
uint32_t *available = hk_query_available_map(pool, firstQuery);
|
||||
struct hk_query_report *reports = hk_query_report_map(dev, pool, firstQuery);
|
||||
|
||||
memset(available, 0, queryCount * sizeof(*available));
|
||||
memset(reports, 0, queryCount * pool->query_stride);
|
||||
host_zero_queries(dev, pool, firstQuery, queryCount, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -300,6 +381,8 @@ hk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
hk_flush_if_timestamp(cmd, pool);
|
||||
|
||||
perf_debug(dev, "Reset query pool");
|
||||
emit_zero_queries(cmd, pool, firstQuery, queryCount, false);
|
||||
}
|
||||
|
|
@ -309,35 +392,56 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
VkPipelineStageFlags2 stage, VkQueryPool queryPool,
|
||||
uint32_t query)
|
||||
{
|
||||
unreachable("todo");
|
||||
#if 0
|
||||
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
struct nv_push *p = hk_cmd_buffer_push(cmd, 10);
|
||||
/* Workaround for DXVK on old kernels */
|
||||
if (!agx_supports_timestamps(&dev->dev))
|
||||
return;
|
||||
|
||||
uint64_t report_addr = hk_query_report_addr(pool, query);
|
||||
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
|
||||
.operation = OPERATION_REPORT_ONLY,
|
||||
.pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stage),
|
||||
.structure_size = STRUCTURE_SIZE_FOUR_WORDS,
|
||||
});
|
||||
uint64_t report_addr = hk_query_report_addr(dev, pool, query);
|
||||
|
||||
uint64_t available_addr = hk_query_available_addr(pool, query);
|
||||
P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_A(p, available_addr >> 32);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_B(p, available_addr);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
|
||||
P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
|
||||
.operation = OPERATION_RELEASE,
|
||||
.release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
|
||||
.pipeline_location = PIPELINE_LOCATION_ALL,
|
||||
.structure_size = STRUCTURE_SIZE_ONE_WORD,
|
||||
});
|
||||
bool after_gfx = cmd->current_cs.gfx != NULL;
|
||||
|
||||
/* When writing timestamps for compute, we split the control stream at each
|
||||
* write. This ensures we never need to copy compute timestamps, which would
|
||||
* require an extra control stream anyway. Unlike graphics, splitting compute
|
||||
* control streams is inexpensive so there's not a strong performance reason
|
||||
* to do otherwise. Finally, batching multiple timestamp writes (like we do
|
||||
* for graphics) would destroy the ability to profile individual compute
|
||||
* dispatches. While that's allowed by the Vulkan spec, it's pretty mean to
|
||||
* apps. So.. don't do that.
|
||||
*/
|
||||
if (!after_gfx && cmd->current_cs.cs &&
|
||||
cmd->current_cs.cs->timestamp.end.addr) {
|
||||
|
||||
perf_debug(dev, "Splitting for compute timestamp");
|
||||
hk_cmd_buffer_end_compute(cmd);
|
||||
}
|
||||
|
||||
struct hk_cs *cs = hk_cmd_buffer_get_cs_general(
|
||||
cmd, after_gfx ? &cmd->current_cs.gfx : &cmd->current_cs.cs, true);
|
||||
if (!cs)
|
||||
return;
|
||||
|
||||
if (cs->timestamp.end.addr) {
|
||||
assert(after_gfx && "compute is handled above");
|
||||
|
||||
struct hk_cs *after =
|
||||
hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.post_gfx, true);
|
||||
if (!after)
|
||||
return;
|
||||
|
||||
libagx_copy_timestamp(after, agx_1d(1), report_addr,
|
||||
cs->timestamp.end.addr);
|
||||
} else {
|
||||
cs->timestamp.end = (struct agx_timestamp_req){
|
||||
.addr = report_addr,
|
||||
.handle = pool->handle,
|
||||
.offset_B = hk_query_offset(pool, query),
|
||||
};
|
||||
}
|
||||
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
|
|
@ -361,7 +465,6 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
if (num_queries > 1)
|
||||
emit_zero_queries(cmd, pool, query + 1, num_queries - 1, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -467,10 +570,18 @@ hk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
}
|
||||
|
||||
static bool
|
||||
hk_query_is_available(struct hk_query_pool *pool, uint32_t query)
|
||||
hk_query_is_available(struct hk_device *dev, struct hk_query_pool *pool,
|
||||
uint32_t query)
|
||||
{
|
||||
uint32_t *available = hk_query_available_map(pool, query);
|
||||
return p_atomic_read(available) != 0;
|
||||
if (hk_has_available(pool)) {
|
||||
uint32_t *available = hk_query_available_map(pool, query);
|
||||
return p_atomic_read(available) != 0;
|
||||
} else {
|
||||
const struct hk_query_report *report =
|
||||
hk_query_report_map(dev, pool, query);
|
||||
|
||||
return report->value != LIBAGX_QUERY_UNAVAILABLE;
|
||||
}
|
||||
}
|
||||
|
||||
#define HK_QUERY_TIMEOUT 2000000000ull
|
||||
|
|
@ -482,7 +593,7 @@ hk_query_wait_for_available(struct hk_device *dev, struct hk_query_pool *pool,
|
|||
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(HK_QUERY_TIMEOUT);
|
||||
|
||||
while (os_time_get_nano() < abs_timeout_ns) {
|
||||
if (hk_query_is_available(pool, query))
|
||||
if (hk_query_is_available(dev, pool, query))
|
||||
return VK_SUCCESS;
|
||||
|
||||
VkResult status = vk_device_check_status(&dev->vk);
|
||||
|
|
@ -522,7 +633,7 @@ hk_GetQueryPoolResults(VkDevice device, VkQueryPool queryPool,
|
|||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
const uint32_t query = firstQuery + i;
|
||||
|
||||
bool available = hk_query_is_available(pool, query);
|
||||
bool available = hk_query_is_available(dev, pool, query);
|
||||
|
||||
if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
|
||||
status = hk_query_wait_for_available(dev, pool, query);
|
||||
|
|
@ -566,6 +677,8 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
VK_FROM_HANDLE(hk_buffer, dst_buffer, dstBuffer);
|
||||
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
hk_flush_if_timestamp(cmd, pool);
|
||||
|
||||
struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true);
|
||||
if (!cs)
|
||||
return;
|
||||
|
|
@ -574,7 +687,7 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
||||
struct libagx_copy_query_args info = {
|
||||
.availability = pool->bo->va->addr,
|
||||
.availability = hk_has_available(pool) ? pool->bo->va->addr : 0,
|
||||
.results = pool->oq_queries ? dev->occlusion_queries.bo->va->addr
|
||||
: pool->bo->va->addr + pool->query_start,
|
||||
.oq_index = pool->oq_queries ? pool->bo->va->addr + pool->query_start : 0,
|
||||
|
|
|
|||
|
|
@ -21,6 +21,11 @@ struct hk_query_pool {
|
|||
struct agx_bo *bo;
|
||||
void *bo_map;
|
||||
|
||||
/* For timestamp queries, the kernel-assigned timestamp buffer handle. Unused
|
||||
* for all other query types
|
||||
*/
|
||||
uint32_t handle;
|
||||
|
||||
unsigned oq_queries;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
|
|||
|
||||
static void
|
||||
asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct drm_asahi_cmd_compute *cmd)
|
||||
struct drm_asahi_cmd_compute *cmd,
|
||||
struct drm_asahi_cmd_compute_user_timestamps *timestamps)
|
||||
{
|
||||
size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
|
||||
|
||||
|
|
@ -87,6 +88,18 @@ asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
|||
.unk_mask = 0xffffffff,
|
||||
};
|
||||
|
||||
if (cs->timestamp.end.handle) {
|
||||
assert(agx_supports_timestamps(&dev->dev));
|
||||
|
||||
*timestamps = (struct drm_asahi_cmd_compute_user_timestamps){
|
||||
.type = ASAHI_COMPUTE_EXT_TIMESTAMPS,
|
||||
.end_handle = cs->timestamp.end.handle,
|
||||
.end_offset = cs->timestamp.end.offset_B,
|
||||
};
|
||||
|
||||
cmd->extensions = (uint64_t)(uintptr_t)timestamps;
|
||||
}
|
||||
|
||||
if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
|
||||
cmd->helper_arg = dev->scratch.cs.buf->va->addr;
|
||||
cmd->helper_cfg = cs->scratch.cs.preamble ? (1 << 16) : 0;
|
||||
|
|
@ -96,7 +109,8 @@ asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
|||
|
||||
static void
|
||||
asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||
struct drm_asahi_cmd_render *c)
|
||||
struct drm_asahi_cmd_render *c,
|
||||
struct drm_asahi_cmd_render_user_timestamps *timestamps)
|
||||
{
|
||||
unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
|
||||
unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
|
||||
|
|
@ -251,6 +265,18 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
|||
c->fragment_helper_cfg = cs->scratch.fs.preamble ? (1 << 16) : 0;
|
||||
c->fragment_helper_program = agx_helper_program(&dev->bg_eot);
|
||||
}
|
||||
|
||||
if (cs->timestamp.end.handle) {
|
||||
assert(agx_supports_timestamps(&dev->dev));
|
||||
|
||||
c->extensions = (uint64_t)(uintptr_t)timestamps;
|
||||
|
||||
*timestamps = (struct drm_asahi_cmd_render_user_timestamps){
|
||||
.type = ASAHI_RENDER_EXT_TIMESTAMPS,
|
||||
.frg_end_handle = cs->timestamp.end.handle,
|
||||
.frg_end_offset = cs->timestamp.end.offset_B,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -278,6 +304,11 @@ union drm_asahi_cmd {
|
|||
struct drm_asahi_cmd_render render;
|
||||
};
|
||||
|
||||
union drm_asahi_user_timestamps {
|
||||
struct drm_asahi_cmd_compute_user_timestamps compute;
|
||||
struct drm_asahi_cmd_render_user_timestamps render;
|
||||
};
|
||||
|
||||
/* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
|
||||
* on the CTS once lossless compression is enabled. This needs to be
|
||||
* investigated before we can reenable this mechanism. We are likely missing a
|
||||
|
|
@ -466,6 +497,8 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
struct drm_asahi_command *cmds = alloca(sizeof(*cmds) * command_count);
|
||||
union drm_asahi_cmd *cmds_inner =
|
||||
alloca(sizeof(*cmds_inner) * command_count);
|
||||
union drm_asahi_user_timestamps *ts_inner =
|
||||
alloca(sizeof(*ts_inner) * command_count);
|
||||
|
||||
unsigned cmd_it = 0;
|
||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||
|
|
@ -491,29 +524,33 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
"%u: Submitting CDM with %u API calls, %u dispatches, %u flushes",
|
||||
i, cs->stats.calls, cs->stats.cmds, cs->stats.flushes);
|
||||
|
||||
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0);
|
||||
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 ||
|
||||
cs->timestamp.end.handle);
|
||||
|
||||
cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE;
|
||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute);
|
||||
nr_cdm++;
|
||||
|
||||
asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute,
|
||||
&ts_inner[cmd_it].compute);
|
||||
|
||||
/* Work around for shipping 6.11.8 kernels, remove when we bump uapi
|
||||
*/
|
||||
if (!cmd.extensions)
|
||||
if (!agx_supports_timestamps(&dev->dev))
|
||||
cmd.cmd_buffer_size -= 8;
|
||||
|
||||
asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute);
|
||||
} else {
|
||||
assert(cs->type == HK_CS_VDM);
|
||||
perf_debug(dev, "%u: Submitting VDM with %u API draws, %u draws", i,
|
||||
cs->stats.calls, cs->stats.cmds);
|
||||
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles);
|
||||
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles ||
|
||||
cs->timestamp.end.handle);
|
||||
|
||||
cmd.cmd_type = DRM_ASAHI_CMD_RENDER;
|
||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render);
|
||||
nr_vdm++;
|
||||
|
||||
asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render);
|
||||
asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render,
|
||||
&ts_inner[cmd_it].render);
|
||||
}
|
||||
|
||||
cmds[cmd_it++] = cmd;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue