tu/autotune: use SAMPLE_COUNT_END_OFFSET when writing the ending sample count

Use the SAMPLE_COUNT_END_OFFSET attribute when writing out the sample count
at the end of ZPASS_DONE event, if the GPU version supports it. The write
is done 16 bytes into the specified iova address, which is already where
the tu_renderpass_samples struct on top of that address expects the end
value.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28610>
This commit is contained in:
Zan Dobersek 2024-04-06 14:44:00 +02:00 committed by Marge Bot
parent 85dd83aa46
commit b2613ba0fb
2 changed files with 11 additions and 7 deletions

View file

@ -667,8 +667,7 @@ tu_autotune_begin_renderpass(struct tu_cmd_buffer *cmd,
&autotune_result->bo);
tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true));
if (CHIP >= A7XX) {
if (cmd->device->physical_device->info->a7xx.has_event_write_sample_count) {
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
.write_sample_count = true).value);
@ -693,17 +692,19 @@ void tu_autotune_end_renderpass(struct tu_cmd_buffer *cmd,
if (!autotune_result->bo.iova)
return;
uint64_t result_iova = autotune_result->bo.iova +
offsetof(struct tu_renderpass_samples, samples_end);
uint64_t result_iova = autotune_result->bo.iova;
tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true));
if (CHIP >= A7XX) {
if (cmd->device->physical_device->info->a7xx.has_event_write_sample_count) {
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 3);
tu_cs_emit(cs, CP_EVENT_WRITE7_0(.event = ZPASS_DONE,
.write_sample_count = true).value);
.write_sample_count = true,
.sample_count_end_offset = true).value);
tu_cs_emit_qw(cs, result_iova);
} else {
result_iova += offsetof(struct tu_renderpass_samples, samples_end);
tu_cs_emit_regs(cs,
A6XX_RB_SAMPLE_COUNT_ADDR(.qword = result_iova));
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);

View file

@ -90,7 +90,7 @@ struct tu_autotune {
* Note that we do the math on the CPU to avoid a WFI. But pre-emption
* may force us to revisit that.
*/
struct tu_renderpass_samples {
struct PACKED tu_renderpass_samples {
uint64_t samples_start;
/* hw requires the sample start/stop locations to be 128b aligned. */
uint64_t __pad0;
@ -98,6 +98,9 @@ struct tu_renderpass_samples {
uint64_t __pad1;
};
/* Necessary when writing sample counts using CP_EVENT_WRITE7::ZPASS_DONE. */
static_assert(offsetof(struct tu_renderpass_samples, samples_end) == 16);
/**
* Tracks the results from an individual renderpass. Initially created
* per renderpass, and appended to the tail of at->pending_results. At a later