mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 15:50:17 +01:00
ac/llvm: implement WA in nir to llvm
LLVM implements multiple workarounds for gfx11.
The problem is that they're not applied for shaders built in
parts.
LLVM will be modified to be more conservative and apply the
workaround in more places but in the meantime, add a simpler
implementation in the NIR to LLVM backend: insert a wait at
the end of each shader part.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10785
Cc: mesa-stable
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29304>
(cherry picked from commit 14974fd097)
This commit is contained in:
parent
bf86fa1b7d
commit
53d1306fe5
6 changed files with 30 additions and 1 deletions
|
|
@ -1004,7 +1004,7 @@
|
|||
"description": "ac/llvm: implement WA in nir to llvm",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -1320,6 +1320,12 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
info->has_export_conflict_bug = info->gfx_level == GFX11;
|
||||
|
||||
/* When LLVM is fixed to handle multiparts shaders, this value will depend
|
||||
* on the known good versions of LLVM. Until then, enable the equivalent WA
|
||||
* in the nir -> llvm backend.
|
||||
*/
|
||||
info->needs_llvm_wait_wa = info->gfx_level == GFX11;
|
||||
|
||||
/* Convert the SDMA version in the current GPU to an enum. */
|
||||
info->sdma_ip_version =
|
||||
(enum sdma_version)SDMA_VERSION_VALUE(info->ip[AMD_IP_SDMA].ver_major,
|
||||
|
|
|
|||
|
|
@ -120,6 +120,9 @@ struct radeon_info {
|
|||
bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */
|
||||
bool has_set_context_pairs_packed;
|
||||
bool has_set_sh_pairs_packed;
|
||||
bool needs_llvm_wait_wa; /* True if the chip needs to workarounds based on s_waitcnt_deptr but
|
||||
* the LLVM version doesn't work with multiparts shaders.
|
||||
*/
|
||||
|
||||
/* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -320,6 +320,7 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
report_if(!part->sections);
|
||||
|
||||
Elf_Scn *section = NULL;
|
||||
bool first_section = true;
|
||||
while ((section = elf_nextscn(part->elf, section))) {
|
||||
Elf64_Shdr *shdr = elf64_getshdr(section);
|
||||
struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
|
||||
|
|
@ -348,6 +349,13 @@ bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
|
|||
}
|
||||
|
||||
if (s->is_pasted_text) {
|
||||
if (part_idx > 0 && first_section && binary->options.waitcnt_wa) {
|
||||
/* Reserve a dword at the beginning of this part. */
|
||||
exec_size += 4;
|
||||
pasted_text_size += 4;
|
||||
first_section = false;
|
||||
}
|
||||
|
||||
s->offset = pasted_text_size;
|
||||
pasted_text_size += shdr->sh_size;
|
||||
} else {
|
||||
|
|
@ -715,6 +723,7 @@ int ac_rtld_upload(struct ac_rtld_upload_info *u)
|
|||
for (unsigned i = 0; i < u->binary->num_parts; ++i) {
|
||||
struct ac_rtld_part *part = &u->binary->parts[i];
|
||||
|
||||
bool first_section = true;
|
||||
Elf_Scn *section = NULL;
|
||||
while ((section = elf_nextscn(part->elf, section))) {
|
||||
Elf64_Shdr *shdr = elf64_getshdr(section);
|
||||
|
|
@ -727,6 +736,13 @@ int ac_rtld_upload(struct ac_rtld_upload_info *u)
|
|||
|
||||
Elf_Data *data = elf_getdata(section, NULL);
|
||||
report_elf_if(!data || data->d_size != shdr->sh_size);
|
||||
|
||||
if (i > 0 && first_section && u->binary->options.waitcnt_wa) {
|
||||
assert(s->offset >= 4);
|
||||
*(uint32_t *)(u->rx_ptr + s->offset - 4) = util_cpu_to_le32(0xbf880fff);
|
||||
first_section = false;
|
||||
}
|
||||
|
||||
memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
|
||||
|
||||
size = MAX2(size, s->offset + shdr->sh_size);
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ struct ac_rtld_options {
|
|||
/* Loader will insert an s_sethalt 1 instruction as the
|
||||
* first instruction. */
|
||||
bool halt_at_entry : 1;
|
||||
|
||||
bool waitcnt_wa : 1;
|
||||
};
|
||||
|
||||
/* Lightweight wrapper around underlying ELF objects. */
|
||||
|
|
|
|||
|
|
@ -788,6 +788,8 @@ bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
|
|||
.options =
|
||||
{
|
||||
.halt_at_entry = screen->options.halt_shaders,
|
||||
.waitcnt_wa = num_parts > 1 &&
|
||||
screen->info.needs_llvm_wait_wa,
|
||||
},
|
||||
.shader_type = sel->stage,
|
||||
.wave_size = shader->wave_size,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue