mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 01:18:06 +02:00
tu: Add a725 workaround dispatch at the start of each cmdbuf
Blob executes a special compute dispatch at the start of each command buffers. We copy this dispatch as is. At this point we don't know what this workaround is for. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25888>
This commit is contained in:
parent
37f11ff1d4
commit
3cd6bb3e5d
6 changed files with 145 additions and 1 deletions
|
|
@ -201,6 +201,11 @@ struct fd_dev_info {
|
|||
|
||||
/* Whether there is CP_EVENT_WRITE7::WRITE_SAMPLE_COUNT */
|
||||
bool has_event_write_sample_count;
|
||||
|
||||
/* Blob executes a special compute dispatch at the start of each
|
||||
* command buffers. We copy this dispatch as is.
|
||||
*/
|
||||
bool cmdbuf_start_a725_quirk;
|
||||
} a7xx;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -704,7 +704,9 @@ add_gpus([
|
|||
)
|
||||
))
|
||||
|
||||
a7xx_725 = A7XXProps()
|
||||
a7xx_725 = A7XXProps(
|
||||
cmdbuf_start_a725_quirk = True,
|
||||
)
|
||||
|
||||
a7xx_730 = A7XXProps()
|
||||
|
||||
|
|
|
|||
|
|
@ -1942,6 +1942,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
|
|||
|
||||
<bitfield name="BV" pos="25" variants="THREAD_MODE" type="boolean"/>
|
||||
<bitfield name="BR" pos="26" variants="THREAD_MODE" type="boolean"/>
|
||||
<bitfield name="LPAC" pos="27" variants="THREAD_MODE" type="boolean"/>
|
||||
|
||||
<bitfield name="MODE" low="28" high="31" type="compare_mode" addvariant="yes"/>
|
||||
</reg32>
|
||||
|
|
|
|||
|
|
@ -1274,6 +1274,15 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4(0x3f0243f0), );
|
||||
}
|
||||
|
||||
if (phys_dev->info->a7xx.cmdbuf_start_a725_quirk) {
|
||||
tu_cs_reserve(cs, 3 + 4);
|
||||
tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
|
||||
tu_cs_emit(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) |
|
||||
CP_COND_REG_EXEC_0_BR | CP_COND_REG_EXEC_0_LPAC);
|
||||
tu_cs_emit(cs, RENDER_MODE_CP_COND_REG_EXEC_1_DWORDS(4));
|
||||
tu_cs_emit_ib(cs, dev->cmdbuf_start_a725_quirk_entry);
|
||||
}
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2016,6 +2016,112 @@ tu_init_dbg_reg_stomper(struct tu_device *device)
|
|||
device->dbg_renderpass_stomp_cs = rp_cs;
|
||||
}
|
||||
|
||||
/* It is unknown what this workaround is for and what it fixes. */
|
||||
static VkResult
|
||||
tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
|
||||
{
|
||||
struct tu_cs *cs;
|
||||
|
||||
if (!(device->cmdbuf_start_a725_quirk_cs =
|
||||
(struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
|
||||
return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"OOM");
|
||||
}
|
||||
|
||||
if (!(device->cmdbuf_start_a725_quirk_entry =
|
||||
(struct tu_cs_entry *) calloc(1, sizeof(struct tu_cs_entry)))) {
|
||||
free(device->cmdbuf_start_a725_quirk_cs);
|
||||
return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"OOM");
|
||||
}
|
||||
|
||||
cs = device->cmdbuf_start_a725_quirk_cs;
|
||||
tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 57, "a725 workaround cs");
|
||||
|
||||
struct tu_cs shader_cs;
|
||||
tu_cs_begin_sub_stream(cs, 10, &shader_cs);
|
||||
|
||||
uint32_t raw_shader[] = {
|
||||
0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
|
||||
0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
|
||||
0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
|
||||
0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
|
||||
0x00000000, 0x03000000, // end
|
||||
};
|
||||
|
||||
tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
|
||||
struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(cs, &shader_cs);
|
||||
uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
|
||||
|
||||
struct tu_cs sub_cs;
|
||||
tu_cs_begin_sub_stream(cs, 47, &sub_cs);
|
||||
|
||||
tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
|
||||
.vs_state = true, .hs_state = true, .ds_state = true,
|
||||
.gs_state = true, .fs_state = true, .gfx_ibo = true,
|
||||
.cs_bindless = 0xff, .gfx_bindless = 0xff));
|
||||
tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
|
||||
.constlen = 4,
|
||||
.enabled = true));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
|
||||
.threadmode = MULTI,
|
||||
.threadsize = THREAD128,
|
||||
.mergedregs = true));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
|
||||
tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
|
||||
HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
|
||||
HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
|
||||
tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_CNTL_1(
|
||||
.linearlocalidregid = regid(63, 0),
|
||||
.threadsize = THREAD128,
|
||||
.unk11 = true,
|
||||
.unk22 = true,
|
||||
.yalign = CS_YALIGN_1));
|
||||
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
|
||||
.wgidconstid = regid(51, 3),
|
||||
.wgsizeconstid = regid(48, 0),
|
||||
.wgoffsetconstid = regid(63, 0),
|
||||
.localidregid = regid(63, 0)));
|
||||
tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
|
||||
.linearlocalidregid = regid(63, 0),
|
||||
.threadsize = THREAD128,
|
||||
.unk15 = true));
|
||||
tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
|
||||
|
||||
tu_cs_emit_regs(&sub_cs,
|
||||
HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
|
||||
.localsizex = 255,
|
||||
.localsizey = 1,
|
||||
.localsizez = 1),
|
||||
HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
|
||||
HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
|
||||
HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
|
||||
HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
|
||||
HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
|
||||
HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
|
||||
tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
|
||||
.localsizex = 255,
|
||||
.localsizey = 0,
|
||||
.localsizez = 0));
|
||||
tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
|
||||
tu_cs_emit(&sub_cs, 0);
|
||||
tu_cs_emit_qw(&sub_cs, shader_iova);
|
||||
|
||||
tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
|
||||
tu_cs_emit(&sub_cs, 0x00000000);
|
||||
tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
|
||||
tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
|
||||
tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
|
||||
|
||||
*device->cmdbuf_start_a725_quirk_entry = tu_cs_end_sub_stream(cs, &sub_cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
|
|
@ -2315,6 +2421,12 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
}
|
||||
}
|
||||
|
||||
if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
|
||||
result = tu_init_cmdbuf_start_a725_quirk(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_a725_workaround;
|
||||
}
|
||||
|
||||
tu_init_dbg_reg_stomper(device);
|
||||
|
||||
/* Initialize a condition variable for timeline semaphore */
|
||||
|
|
@ -2376,6 +2488,12 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
return VK_SUCCESS;
|
||||
|
||||
fail_timeline_cond:
|
||||
if (device->cmdbuf_start_a725_quirk_entry) {
|
||||
free(device->cmdbuf_start_a725_quirk_entry);
|
||||
tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
|
||||
free(device->cmdbuf_start_a725_quirk_cs);
|
||||
}
|
||||
fail_a725_workaround:
|
||||
fail_prepare_perfcntrs_pass_cs:
|
||||
free(device->perfcntrs_pass_cs_entries);
|
||||
tu_cs_finish(device->perfcntrs_pass_cs);
|
||||
|
|
@ -2462,6 +2580,12 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
free(device->dbg_renderpass_stomp_cs);
|
||||
}
|
||||
|
||||
if (device->cmdbuf_start_a725_quirk_entry) {
|
||||
free(device->cmdbuf_start_a725_quirk_entry);
|
||||
tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
|
||||
free(device->cmdbuf_start_a725_quirk_cs);
|
||||
}
|
||||
|
||||
tu_autotune_fini(&device->autotune, device);
|
||||
|
||||
tu_bo_suballocator_finish(&device->pipeline_suballoc);
|
||||
|
|
|
|||
|
|
@ -355,6 +355,9 @@ struct tu_device
|
|||
struct tu_cs *perfcntrs_pass_cs;
|
||||
struct tu_cs_entry *perfcntrs_pass_cs_entries;
|
||||
|
||||
struct tu_cs *cmdbuf_start_a725_quirk_cs;
|
||||
struct tu_cs_entry *cmdbuf_start_a725_quirk_entry;
|
||||
|
||||
struct util_dynarray dynamic_rendering_pending;
|
||||
VkCommandPool dynamic_rendering_pool;
|
||||
uint32_t dynamic_rendering_fence;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue