mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
tu/query: improve CP_EVENT_WRITE7::ZPASS_DONE usage
The WRITE_SAMPLE_COUNT_DIFF attribute of CP_EVENT_WRITE7 calculates the difference between the begin and end sample-count values, but it in fact accumulates that difference in the desired location, so it's renamed to reflect that. When writing out the ending-point samples count for the occlusion query through CP_EVENT_WRITE7::ZPASS_DONE, enable the SAMPLE_COUNT_END_OFFSET and WRITE_ACCUM_SAMPLE_COUNT_DIFF attributes on the event struct and use the address of the beginning-count value. The latter adjustment is necessary since the difference will be written 8 bytes into the buffer and the ending-count will be written 16 bytes into the buffer. With the occlusion query result now being handily accumulated in the query buffer as long as the hardware supports it, the occlusion_query_slot struct is adjusted to reflect that. On pre-a740 hardware the difference is still calculated and stored in the result location, whereas on newer hardware CP_EVENT_WRITE7::ZPASS_DONE can do it for us. Signed-off-by: Zan Dobersek <zdobersek@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28610>
This commit is contained in:
parent
1df5ab2562
commit
85a8cc14c0
2 changed files with 40 additions and 25 deletions
|
|
@ -1659,8 +1659,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
|
|||
<bitfield name="WRITE_SAMPLE_COUNT" pos="12" type="boolean"/>
|
||||
<!-- Write sample count at (iova + 16) -->
|
||||
<bitfield name="SAMPLE_COUNT_END_OFFSET" pos="13" type="boolean"/>
|
||||
<!-- *(iova + 8) = *(iova + 16) - *iova -->
|
||||
<bitfield name="WRITE_SAMPLE_COUNT_DIFF" pos="14" type="boolean"/>
|
||||
<!-- *(iova + 8) += *(iova + 16) - *iova -->
|
||||
<bitfield name="WRITE_ACCUM_SAMPLE_COUNT_DIFF" pos="14" type="boolean"/>
|
||||
|
||||
<!-- Next 4 flags are valid to set only when concurrent binning is enabled -->
|
||||
<!-- Increment 16b BV counter. Valid only in BV pipe -->
|
||||
|
|
|
|||
|
|
@ -30,19 +30,14 @@ struct PACKED query_slot {
|
|||
uint64_t available;
|
||||
};
|
||||
|
||||
struct PACKED occlusion_slot_value {
|
||||
/* Seems sample counters are placed to be 16-byte aligned
|
||||
* even though this query needs an 8-byte slot. */
|
||||
uint64_t value;
|
||||
uint64_t _padding;
|
||||
};
|
||||
|
||||
struct PACKED occlusion_query_slot {
|
||||
struct query_slot common;
|
||||
uint64_t result;
|
||||
uint64_t _padding0;
|
||||
|
||||
struct occlusion_slot_value begin;
|
||||
struct occlusion_slot_value end;
|
||||
uint64_t begin;
|
||||
uint64_t result;
|
||||
uint64_t end;
|
||||
uint64_t _padding1;
|
||||
};
|
||||
|
||||
struct PACKED timestamp_query_slot {
|
||||
|
|
@ -96,13 +91,18 @@ struct PACKED primitives_generated_query_slot {
|
|||
uint64_t end;
|
||||
};
|
||||
|
||||
/* Returns the IOVA of a given uint64_t field in a given slot of a query
|
||||
* pool. */
|
||||
/* Returns the IOVA or mapped address of a given uint64_t field
|
||||
* in a given slot of a query pool. */
|
||||
#define query_iova(type, pool, query, field) \
|
||||
pool->bo->iova + pool->stride * (query) + offsetof(type, field)
|
||||
#define query_addr(type, pool, query, field) \
|
||||
(uint64_t *) ((char *) pool->bo->map + pool->stride * (query) + \
|
||||
offsetof(type, field))
|
||||
|
||||
#define occlusion_query_iova(pool, query, field) \
|
||||
query_iova(struct occlusion_query_slot, pool, query, field)
|
||||
#define occlusion_query_addr(pool, query, field) \
|
||||
query_addr(struct occlusion_query_slot, pool, query, field)
|
||||
|
||||
#define pipeline_stat_query_iova(pool, query, field, idx) \
|
||||
pool->bo->iova + pool->stride * (query) + \
|
||||
|
|
@ -527,6 +527,9 @@ get_query_pool_results(struct tu_device *device,
|
|||
result = query_result_addr(pool, query, uint64_t, stat_idx);
|
||||
} else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
result = query_result_addr(pool, query, struct perfcntr_query_slot, k);
|
||||
} else if (pool->type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
assert(k == 0);
|
||||
result = occlusion_query_addr(pool, query, result);
|
||||
} else {
|
||||
result = query_result_addr(pool, query, uint64_t, k);
|
||||
}
|
||||
|
|
@ -665,6 +668,9 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
|
|||
} else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
result_iova = query_result_iova(pool, query,
|
||||
struct perfcntr_query_slot, k);
|
||||
} else if (pool->type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
assert(k == 0);
|
||||
result_iova = occlusion_query_iova(pool, query, result);
|
||||
} else {
|
||||
result_iova = query_result_iova(pool, query, uint64_t, k);
|
||||
}
|
||||
|
|
@ -765,6 +771,9 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
|||
} else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
result_iova = query_result_iova(pool, query,
|
||||
struct perfcntr_query_slot, k);
|
||||
} else if (pool->type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
assert(k == 0);
|
||||
result_iova = occlusion_query_iova(pool, query, result);
|
||||
} else {
|
||||
result_iova = query_result_iova(pool, query, uint64_t, k);
|
||||
}
|
||||
|
|
@ -818,6 +827,9 @@ tu_ResetQueryPool(VkDevice device,
|
|||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
res = query_result_addr(pool, i + firstQuery,
|
||||
struct perfcntr_query_slot, k);
|
||||
} else if (pool->type == VK_QUERY_TYPE_OCCLUSION) {
|
||||
assert(k == 0);
|
||||
res = occlusion_query_addr(pool, i + firstQuery, result);
|
||||
} else {
|
||||
res = query_result_addr(pool, i + firstQuery, uint64_t, k);
|
||||
}
|
||||
|
|
@ -1154,8 +1166,8 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
|
|||
|
||||
uint64_t available_iova = query_available_iova(pool, query);
|
||||
uint64_t begin_iova = occlusion_query_iova(pool, query, begin);
|
||||
uint64_t result_iova = occlusion_query_iova(pool, query, result);
|
||||
uint64_t end_iova = occlusion_query_iova(pool, query, end);
|
||||
uint64_t result_iova = query_result_iova(pool, query, uint64_t, 0);
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
tu_cs_emit_qw(cs, 0xffffffffffffffffull);
|
||||
|
|
@ -1176,11 +1188,12 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit(cs, CCU_CLEAN_DEPTH);
|
||||
}
|
||||
} else {
|
||||
/* A7XX TODO: Calculate (end - begin) via ZPASS_DONE. */
|
||||
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
|
||||
tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
|
||||
.write_sample_count = true).value);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
.write_sample_count = true,
|
||||
.sample_count_end_offset = true,
|
||||
.write_accum_sample_count_diff = true).value);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
|
||||
|
|
@ -1191,13 +1204,15 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
|
||||
|
||||
/* result (dst) = result (srcA) + end (srcB) - begin (srcC) */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
|
||||
tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
if (!cmdbuf->device->physical_device->info->a7xx.has_event_write_sample_count) {
|
||||
/* result (dst) = result (srcA) + end (srcB) - begin (srcC) */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
|
||||
tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, result_iova);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue