mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 00:38:06 +02:00
ac/llvm: implement WA in nir to llvm
LLVM implements multiple workarounds for gfx11. The problem is that they're not applied for shaders built in parts. LLVM will be modified to be more conservative and apply the workaround in more places but in the meantime, add a simpler implementation in the NIR to LLVM backend: insert a wait at the end of each shader part. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10785 Cc: mesa-stable Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29304>
This commit is contained in:
parent
71afacff39
commit
14974fd097
5 changed files with 29 additions and 0 deletions
|
|
@ -1344,6 +1344,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->has_export_conflict_bug = info->gfx_level == GFX11;
|
||||
|
||||
/* When LLVM is fixed to handle multiparts shaders, this value will depend
|
||||
* on the known good versions of LLVM. Until then, enable the equivalent WA
|
||||
* in the nir -> llvm backend.
|
||||
*/
|
||||
info->needs_llvm_wait_wa = info->gfx_level == GFX11;
|
||||
|
||||
/* Convert the SDMA version in the current GPU to an enum. */
|
||||
info->sdma_ip_version =
|
||||
(enum sdma_version)SDMA_VERSION_VALUE(info->ip[AMD_IP_SDMA].ver_major,
|
||||
|
|
|
|||
|
|
@ -125,6 +125,9 @@ struct radeon_info {
|
|||
bool has_set_sh_pairs;
|
||||
bool has_set_sh_pairs_packed;
|
||||
bool has_set_uconfig_pairs;
|
||||
bool needs_llvm_wait_wa; /* True if the chip needs to workarounds based on s_waitcnt_deptr but
|
||||
* the LLVM version doesn't work with multiparts shaders.
|
||||
*/
|
||||
|
||||
/* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -320,6 +320,7 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
report_if(!part->sections);
|
||||
|
||||
Elf_Scn *section = NULL;
|
||||
bool first_section = true;
|
||||
while ((section = elf_nextscn(part->elf, section))) {
|
||||
Elf64_Shdr *shdr = elf64_getshdr(section);
|
||||
struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
|
||||
|
|
@ -348,6 +349,13 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
}
|
||||
|
||||
if (s->is_pasted_text) {
|
||||
if (part_idx > 0 && first_section && binary->options.waitcnt_wa) {
|
||||
/* Reserve a dword at the beginning of this part. */
|
||||
exec_size += 4;
|
||||
pasted_text_size += 4;
|
||||
first_section = false;
|
||||
}
|
||||
|
||||
s->offset = pasted_text_size;
|
||||
pasted_text_size += shdr->sh_size;
|
||||
} else {
|
||||
|
|
@ -715,6 +723,7 @@ int ac_rtld_upload(struct ac_rtld_upload_info *u)
|
|||
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
|
||||
struct ac_rtld_part *part = &u->binary->parts[i];
|
||||
|
||||
bool first_section = true;
|
||||
Elf_Scn *section = NULL;
|
||||
while ((section = elf_nextscn(part->elf, section))) {
|
||||
Elf64_Shdr *shdr = elf64_getshdr(section);
|
||||
|
|
@ -727,6 +736,13 @@ int ac_rtld_upload(struct ac_rtld_upload_info *u)
|
|||
|
||||
Elf_Data *data = elf_getdata(section, NULL);
|
||||
report_elf_if(!data || data->d_size != shdr->sh_size);
|
||||
|
||||
if (i > 0 && first_section && u->binary->options.waitcnt_wa) {
|
||||
assert(s->offset >= 4);
|
||||
*(uint32_t *)(u->rx_ptr + s->offset - 4) = util_cpu_to_le32(0xbf880fff);
|
||||
first_section = false;
|
||||
}
|
||||
|
||||
memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
|
||||
|
||||
size = MAX2(size, s->offset + shdr->sh_size);
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ struct ac_rtld_options {
|
|||
/* Loader will insert an s_sethalt 1 instruction as the
|
||||
* first instruction. */
|
||||
bool halt_at_entry : 1;
|
||||
|
||||
bool waitcnt_wa : 1;
|
||||
};
|
||||
|
||||
/* Lightweight wrapper around underlying ELF objects. */
|
||||
|
|
|
|||
|
|
@ -803,6 +803,8 @@ bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
|
|||
.options =
|
||||
{
|
||||
.halt_at_entry = screen->options.halt_shaders,
|
||||
.waitcnt_wa = num_parts > 1 &&
|
||||
screen->info.needs_llvm_wait_wa,
|
||||
},
|
||||
.shader_type = sel->stage,
|
||||
.wave_size = shader->wave_size,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue