panfrost: use bifrost instruction table for bi_lower_swizzle

Fixes two known issues:

 - We did not lower invalid swizzles for IADD.v4s8, triggered in the CTS by
   enabling uniformAndStorageBuffer8BitAccess and storageBuffer8BitAccess in
   panvk.
 - We did not lower invalid swizzles for IMUL.v4i8, triggered by
   dEQP-VK.spirv_assembly.instruction.compute.mul_extended.(un)signed_8bit
   on bifrost.

The old logic was missing several other instructions, so there may be
additional bugs that we don't know about.

There are no cases where the new behavior will keep swizzles that would
have been lowered previously, so this change should not introduce any
new bugs with valhall.

Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com>
Reviewed-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33416>
This commit is contained in:
Benjamin Lee 2025-02-13 02:20:21 -08:00
parent f513ddfc1c
commit 168c96816a
5 changed files with 87 additions and 92 deletions

View file

@ -360,9 +360,6 @@ dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.1048576,Fail
dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.65536,Fail
dEQP-VK.memory.pipeline_barrier.host_write_index_buffer.8192,Fail
# src/panfrost/compiler/bi_packer.c:1116: bi_pack_fma_imul_v4i8: Assertion `replicate0 < 8' failed
dEQP-VK.spirv_assembly.instruction.compute.mul_extended.unsigned_8bit,Crash
dEQP-VK.spirv_assembly.instruction.compute.mul_extended.signed_8bit,Crash
# New with vk_clear
dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_copy_before_resolving_no_cab.4_bit,Fail

View file

@ -22,6 +22,7 @@
*/
#include "bi_builder.h"
#include "bi_swizzles.h"
#include "compiler.h"
#include "valhall.h"
@ -48,94 +49,9 @@ bi_swizzle_replicates_8(enum bi_swizzle swz)
static void
lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
{
/* TODO: Use the opcode table and be a lot more methodical about this... */
switch (ins->op) {
/* Some instructions used with 16-bit data never have swizzles */
case BI_OPCODE_CSEL_V2F16:
case BI_OPCODE_CSEL_V2I16:
case BI_OPCODE_CSEL_V2S16:
case BI_OPCODE_CSEL_V2U16:
break;
/* Despite ostensibly being 32-bit instructions, CLPER does not
* inherently interpret the data, so it can be used for v2f16
* derivatives, which might require swizzle lowering */
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
if (src == 0)
break;
else
return;
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
* boolean is implemented as a 16-bit integer, the swizzle is needed
* for correct operation if the instruction producing the 16-bit
* boolean does not replicate to both halves of the containing 32-bit
* register. As such, we may need to lower a swizzle.
*
* This is a silly hack. Ideally, code gen would be smart enough to
* avoid this case (by replicating). In practice, silly hardware design
* decisions force our hand here.
*/
case BI_OPCODE_MUX_I32:
case BI_OPCODE_CSEL_I32:
break;
case BI_OPCODE_IADD_V2S16:
case BI_OPCODE_IADD_V2U16:
case BI_OPCODE_ISUB_V2S16:
case BI_OPCODE_ISUB_V2U16:
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
break;
else
return;
case BI_OPCODE_LSHIFT_AND_V2I16:
case BI_OPCODE_LSHIFT_OR_V2I16:
case BI_OPCODE_LSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_AND_V2I16:
case BI_OPCODE_RSHIFT_OR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V2I16:
if (src == 2)
return;
else
break;
/* For some reason MUX.v2i16 allows swaps but not replication */
case BI_OPCODE_MUX_V2I16:
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
return;
else
break;
/* No swizzles supported */
case BI_OPCODE_LDEXP_V2F16:
case BI_OPCODE_HADD_V4U8:
case BI_OPCODE_HADD_V4S8:
case BI_OPCODE_CLZ_V4U8:
case BI_OPCODE_IDP_V4I8:
case BI_OPCODE_IABS_V4S8:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_MUX_V4I8:
case BI_OPCODE_IADD_IMM_V4I8:
break;
case BI_OPCODE_LSHIFT_AND_V4I8:
case BI_OPCODE_LSHIFT_OR_V4I8:
case BI_OPCODE_LSHIFT_XOR_V4I8:
case BI_OPCODE_RSHIFT_AND_V4I8:
case BI_OPCODE_RSHIFT_OR_V4I8:
case BI_OPCODE_RSHIFT_XOR_V4I8:
/* Last source allows identity or replication */
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
return;
/* Others do not allow swizzles */
break;
/* We don't want to deal with reswizzling logic in modifier prop. Move
* the swizzle outside, it's easier for clamp propagation. */
case BI_OPCODE_FCLAMP_V2F16: {
if (ins->op == BI_OPCODE_FCLAMP_V2F16) {
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
bi_index dest = ins->dest[0];
bi_index tmp = bi_temp(ctx);
@ -147,9 +63,9 @@ lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
return;
}
default:
uint32_t supported_swizzles = bi_op_swizzles[ins->op][src];
if (supported_swizzles & (1 << ins->src[src].swizzle))
return;
}
/* First, try to apply a given swizzle to a constant to clear the
* runtime swizzle. This is less heavy-handed than ignoring the

View file

@ -0,0 +1,54 @@
# Copyright © 2025 Collabora Ltd.
# SPDX-License-Identifier: MIT
TEMPLATE = """
#include "bi_swizzles.h"
uint32_t bi_op_swizzles[BI_NUM_OPCODES][BI_MAX_SRCS] = {
% for opcode, src_swizzles in op_swizzles.items():
[BI_OPCODE_${opcode.replace('.', '_').upper()}] = {
% for src in src_swizzles:
% for swizzle in src:
(1 << BI_SWIZZLE_${swizzle.upper()}) |
% endfor
0,
% endfor
},
% endfor
};
"""
import sys
from bifrost_isa import *
from mako.template import Template
instructions = {}
for arg in sys.argv[1:]:
new_instructions = parse_instructions(arg, include_pseudo = True)
instructions.update(new_instructions)
ir_instructions = partition_mnemonics(instructions)
op_swizzles = {}
for name, op in ir_instructions.items():
src_swizzles = [None] * op['srcs']
SWIZZLE_MODS = ["lane", "lanes", "replicate", "swz", "widen", "swap"]
for mod, opts in op['modifiers'].items():
raw, arg = (mod[0:-1], int(mod[-1])) if mod[-1] in "0123" else (mod, 0)
if raw in SWIZZLE_MODS:
assert(src_swizzles[arg] is None)
src_swizzles[arg] = set(opts)
src_swizzles[arg].discard('reserved')
if 'none' in src_swizzles[arg]:
src_swizzles[arg].remove('none')
src_swizzles[arg].add('h01')
# Anything that doesn't have swizzle mods only supports identity
src_swizzles = [swizzle or ['h01'] for swizzle in src_swizzles]
op_swizzles[name] = src_swizzles
print(Template(COPYRIGHT + TEMPLATE).render(op_swizzles = op_swizzles))

View file

@ -0,0 +1,19 @@
/*
* Copyright © 2025 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef _BI_SWIZZLES_H_
#define _BI_SWIZZLES_H_
#include "bi_opcodes.h"
#include "compiler.h"
/* Bitset of supported bi_swizzle for each src of each instruction.
*
* These are bifrost-only. Supported swizzles on valhall are determined by the
* flags in va_src_info.
*
* Generated in bi_swizzles.c.py */
extern uint32_t bi_op_swizzles[BI_NUM_OPCODES][BI_MAX_SRCS];
#endif

View file

@ -59,6 +59,15 @@ bi_opcodes_c = custom_target(
depend_files : files('bifrost_isa.py'),
)
bi_swizzles_c = custom_target(
'bi_swizzles.c',
input : ['bi_swizzles.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml'],
output : 'bi_swizzles.c',
command : [prog_python, '@INPUT@'],
capture : true,
depend_files : files('bifrost_isa.py'),
)
bi_printer_c = custom_target(
'bi_printer.c',
input : ['bi_printer.c.py', 'IR_pseudo.xml', 'bifrost/ISA.xml', 'valhall/ISA.xml'],
@ -129,7 +138,7 @@ libpanfrost_bifrost_disasm = static_library(
libpanfrost_bifrost = static_library(
'panfrost_bifrost',
[libpanfrost_bifrost_files, bi_opcodes_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c],
[libpanfrost_bifrost_files, bi_opcodes_c, bi_swizzles_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c ],
include_directories : [inc_include, inc_src, inc_valhall],
dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h, idep_valhall_enums_h],
link_with: [libpanfrost_util, libpanfrost_bifrost_disasm, libpanfrost_valhall_disasm],