From 4870d7d829e57a993976d6da497e1202b1df2fa6 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 31 May 2021 18:18:24 +0100 Subject: [PATCH] aco: use v1b/v2b for ds_read_u8/ds_read_u16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The p_extract_vector isn't necessary. For ds_read_u8 and ds_read_u16, we used a 32-bit regclass, but did't load 32 bits, and used dst_hint for vector loads when we shouldn't have. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4863 Cc: mesa-stable Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 5 +---- src/amd/compiler/aco_opcodes.py | 5 +++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bc9c11f96f1..ed87d757e03 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3708,7 +3708,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info, const_offset /= const_offset_unit; - RegClass rc = RegClass(RegType::vgpr, DIV_ROUND_UP(size, 4)); + RegClass rc = RegClass::get(RegType::vgpr, size); Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc); Instruction *instr; if (read2) @@ -3717,9 +3717,6 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info, instr = bld.ds(op, Definition(val), offset, m, const_offset); instr->ds().sync = info.sync; - if (size < 4) - val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u)); - return val; } diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index c399e1d86bb..a4f2688fe89 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -1696,3 +1696,8 @@ for ver in ['gfx9', 'gfx10']: sys.exit(1) else: op_to_name[key] = op.name + +# These instructions write the entire 32-bit VGPR, but it's not clear in Opcode's constructor that +# it should be 32, since it works accidentally. +assert(opcodes['ds_read_u8'].definition_size == 32) +assert(opcodes['ds_read_u16'].definition_size == 32)