mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
panfrost: use bifrost instruction table for bi_lower_swizzle
Fixes two known issues: - We did not lower invalid swizzles for IADD.v4s8, triggered in the CTS by enabling uniformAndStorageBuffer8BitAccess and storageBuffer8BitAccess in panvk. - We did not lower invalid swizzles for IMUL.v4i8, triggered by dEQP-VK.spirv_assembly.instruction.compute.mul_extended.(un)signed_8bit on bifrost. The old logic was missing several other instructions, so there may be additional bugs that we don't know about. There are no cases where the new behavior will keep swizzles that would have been lowered previously, so this change should not introduce any new bugs with valhall. Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com> Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33416>
This commit is contained in:
parent
f513ddfc1c
commit
168c96816a
5 changed files with 87 additions and 92 deletions
|
|
@ -360,9 +360,6 @@ dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.1048576,Fail
|
|||
dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.65536,Fail
|
||||
dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.8192,Fail
|
||||
|
||||
# src/panfrost/compiler/bi_packer.c:1116: bi_pack_fma_imul_v4i8: Assertion `replicate0 < 8' failed
|
||||
dEQP-VK.spirv_assembly.instruction.compute.mul_extended.unsigned_8bit,Crash
|
||||
dEQP-VK.spirv_assembly.instruction.compute.mul_extended.signed_8bit,Crash
|
||||
|
||||
# New with vk_clear
|
||||
dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_copy_before_resolving_no_cab.4_bit,Fail
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "bi_swizzles.h"
|
||||
#include "compiler.h"
|
||||
#include "valhall.h"
|
||||
|
||||
|
|
@ -48,94 +49,9 @@ bi_swizzle_replicates_8(enum bi_swizzle swz)
|
|||
static void
|
||||
lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
|
||||
{
|
||||
/* TODO: Use the opcode table and be a lot more methodical about this... */
|
||||
switch (ins->op) {
|
||||
/* Some instructions used with 16-bit data never have swizzles */
|
||||
case BI_OPCODE_CSEL_V2F16:
|
||||
case BI_OPCODE_CSEL_V2I16:
|
||||
case BI_OPCODE_CSEL_V2S16:
|
||||
case BI_OPCODE_CSEL_V2U16:
|
||||
break;
|
||||
|
||||
/* Despite ostensibly being 32-bit instructions, CLPER does not
|
||||
* inherently interpret the data, so it can be used for v2f16
|
||||
* derivatives, which might require swizzle lowering */
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
case BI_OPCODE_CLPER_OLD_I32:
|
||||
if (src == 0)
|
||||
break;
|
||||
else
|
||||
return;
|
||||
|
||||
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
|
||||
* boolean is implemented as a 16-bit integer, the swizzle is needed
|
||||
* for correct operation if the instruction producing the 16-bit
|
||||
* boolean does not replicate to both halves of the containing 32-bit
|
||||
* register. As such, we may need to lower a swizzle.
|
||||
*
|
||||
* This is a silly hack. Ideally, code gen would be smart enough to
|
||||
* avoid this case (by replicating). In practice, silly hardware design
|
||||
* decisions force our hand here.
|
||||
*/
|
||||
case BI_OPCODE_MUX_I32:
|
||||
case BI_OPCODE_CSEL_I32:
|
||||
break;
|
||||
|
||||
case BI_OPCODE_IADD_V2S16:
|
||||
case BI_OPCODE_IADD_V2U16:
|
||||
case BI_OPCODE_ISUB_V2S16:
|
||||
case BI_OPCODE_ISUB_V2U16:
|
||||
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
|
||||
break;
|
||||
else
|
||||
return;
|
||||
case BI_OPCODE_LSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_LSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_LSHIFT_XOR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_RSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_XOR_V2I16:
|
||||
if (src == 2)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
|
||||
/* For some reason MUX.v2i16 allows swaps but not replication */
|
||||
case BI_OPCODE_MUX_V2I16:
|
||||
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
|
||||
/* No swizzles supported */
|
||||
case BI_OPCODE_LDEXP_V2F16:
|
||||
case BI_OPCODE_HADD_V4U8:
|
||||
case BI_OPCODE_HADD_V4S8:
|
||||
case BI_OPCODE_CLZ_V4U8:
|
||||
case BI_OPCODE_IDP_V4I8:
|
||||
case BI_OPCODE_IABS_V4S8:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_MUX_V4I8:
|
||||
case BI_OPCODE_IADD_IMM_V4I8:
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_LSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_LSHIFT_XOR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_RSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_XOR_V4I8:
|
||||
/* Last source allows identity or replication */
|
||||
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
|
||||
return;
|
||||
|
||||
/* Others do not allow swizzles */
|
||||
break;
|
||||
|
||||
/* We don't want to deal with reswizzling logic in modifier prop. Move
|
||||
* the swizzle outside, it's easier for clamp propagation. */
|
||||
case BI_OPCODE_FCLAMP_V2F16: {
|
||||
if (ins->op == BI_OPCODE_FCLAMP_V2F16) {
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
bi_index dest = ins->dest[0];
|
||||
bi_index tmp = bi_temp(ctx);
|
||||
|
|
@ -147,9 +63,9 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
|
|||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
uint32_t supported_swizzles = bi_op_swizzles[ins->op][src];
|
||||
if (supported_swizzles & (1 << ins->src[src].swizzle))
|
||||
return;
|
||||
}
|
||||
|
||||
/* First, try to apply a given swizzle to a constant to clear the
|
||||
* runtime swizzle. This is less heavy-handed than ignoring the
|
||||
|
|
|
|||
54
src/panfrost/compiler/bi_swizzles.c.py
Normal file
54
src/panfrost/compiler/bi_swizzles.c.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright © 2025 Collabora Ltd.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
TEMPLATE = """
|
||||
#include "bi_swizzles.h"
|
||||
|
||||
uint32_t bi_op_swizzles[BI_NUM_OPCODES][BI_MAX_SRCS] = {
|
||||
% for opcode, src_swizzles in op_swizzles.items():
|
||||
[BI_OPCODE_${opcode.replace('.', '_').upper()}] = {
|
||||
% for src in src_swizzles:
|
||||
% for swizzle in src:
|
||||
(1 << BI_SWIZZLE_${swizzle.upper()}) |
|
||||
% endfor
|
||||
0,
|
||||
% endfor
|
||||
},
|
||||
% endfor
|
||||
};
|
||||
"""
|
||||
|
||||
import sys
|
||||
from bifrost_isa import *
|
||||
from mako.template import Template
|
||||
|
||||
instructions = {}
|
||||
for arg in sys.argv[1:]:
|
||||
new_instructions = parse_instructions(arg, include_pseudo = True)
|
||||
instructions.update(new_instructions)
|
||||
|
||||
ir_instructions = partition_mnemonics(instructions)
|
||||
|
||||
op_swizzles = {}
|
||||
|
||||
for name, op in ir_instructions.items():
|
||||
src_swizzles = [None] * op['srcs']
|
||||
SWIZZLE_MODS = ["lane", "lanes", "replicate", "swz", "widen", "swap"]
|
||||
|
||||
for mod, opts in op['modifiers'].items():
|
||||
raw, arg = (mod[0:-1], int(mod[-1])) if mod[-1] in "0123" else (mod, 0)
|
||||
if raw in SWIZZLE_MODS:
|
||||
assert(src_swizzles[arg] is None)
|
||||
src_swizzles[arg] = set(opts)
|
||||
|
||||
src_swizzles[arg].discard('reserved')
|
||||
if 'none' in src_swizzles[arg]:
|
||||
src_swizzles[arg].remove('none')
|
||||
src_swizzles[arg].add('h01')
|
||||
|
||||
# Anything that doesn't have swizzle mods only supports identity
|
||||
src_swizzles = [swizzle or ['h01'] for swizzle in src_swizzles]
|
||||
|
||||
op_swizzles[name] = src_swizzles
|
||||
|
||||
print(Template(COPYRIGHT + TEMPLATE).render(op_swizzles = op_swizzles))
|
||||
19
src/panfrost/compiler/bi_swizzles.h
Normal file
19
src/panfrost/compiler/bi_swizzles.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright © 2025 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _BI_SWIZZLES_H_
|
||||
#define _BI_SWIZZLES_H_
|
||||
|
||||
#include "bi_opcodes.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Bitset of supported bi_swizzle for each src of each instruction.
|
||||
*
|
||||
* These are bifrost-only. Supported swizzles on valhall are determined by the
|
||||
* flags in va_src_info.
|
||||
*
|
||||
* Generated in bi_swizzles.c.py */
|
||||
extern uint32_t bi_op_swizzles[BI_NUM_OPCODES][BI_MAX_SRCS];
|
||||
|
||||
#endif
|
||||
|
|
@ -59,6 +59,15 @@ bi_opcodes_c = custom_target(
|
|||
depend_files : files('bifrost_isa.py'),
|
||||
)
|
||||
|
||||
bi_swizzles_c = custom_target(
|
||||
'bi_swizzles.c',
|
||||
input : ['bi_swizzles.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml'],
|
||||
output : 'bi_swizzles.c',
|
||||
command : [prog_python, '@INPUT@'],
|
||||
capture : true,
|
||||
depend_files : files('bifrost_isa.py'),
|
||||
)
|
||||
|
||||
bi_printer_c = custom_target(
|
||||
'bi_printer.c',
|
||||
input : ['bi_printer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
|
||||
|
|
@ -129,7 +138,7 @@ libpanfrost_bifrost_disasm = static_library(
|
|||
|
||||
libpanfrost_bifrost = static_library(
|
||||
'panfrost_bifrost',
|
||||
[libpanfrost_bifrost_files, bi_opcodes_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c],
|
||||
[libpanfrost_bifrost_files, bi_opcodes_c, bi_swizzles_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c ],
|
||||
include_directories : [inc_include, inc_src, inc_valhall],
|
||||
dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h, idep_valhall_enums_h],
|
||||
link_with: [libpanfrost_util, libpanfrost_bifrost_disasm, libpanfrost_valhall_disasm],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue