From 9e77cb7493cb4a776e38dbce5a80439fc0c201f9 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 24 Aug 2025 09:01:28 +0200 Subject: [PATCH] aco: fix ra validation for flat/global/scratch/ds load sbyte_d16 Fixes: 18a53230eb5 ("aco: don't check dst_bitsize in apply_load_extract") Reviewed-by: Rhys Perry Reviewed-by: Dave Airlie Part-of: (cherry picked from commit 791a57805c53b2120c1d8e3637f2fa3036212361) --- .pick_status.json | 2 +- src/amd/compiler/aco_validate.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 0d64198f81f..f37cf92e1d0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5164,7 +5164,7 @@ "description": "aco: fix ra validation for flat/global/scratch/ds load sbyte_d16", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "18a53230eb553435f892ac4f33e557ac254984c8", "notes": null diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 6122b86d7b1..b3c378c2359 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -1297,12 +1297,16 @@ validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr case aco_opcode::buffer_load_short_d16_hi: case aco_opcode::buffer_load_format_d16_hi_x: case aco_opcode::flat_load_ubyte_d16_hi: + case aco_opcode::flat_load_sbyte_d16_hi: case aco_opcode::flat_load_short_d16_hi: case aco_opcode::scratch_load_ubyte_d16_hi: + case aco_opcode::scratch_load_sbyte_d16_hi: case aco_opcode::scratch_load_short_d16_hi: case aco_opcode::global_load_ubyte_d16_hi: + case aco_opcode::global_load_sbyte_d16_hi: case aco_opcode::global_load_short_d16_hi: case aco_opcode::ds_read_u8_d16_hi: + case aco_opcode::ds_read_i8_d16_hi: case aco_opcode::ds_read_u16_d16_hi: return byte == 2; default: break; } @@ -1341,24 +1345,32 @@ get_subdword_bytes_written(Program* program, const aco_ptr& instr, case aco_opcode::buffer_load_format_d16_x: case aco_opcode::tbuffer_load_format_d16_x: case aco_opcode::flat_load_ubyte_d16: + case aco_opcode::flat_load_sbyte_d16: case aco_opcode::flat_load_short_d16: case aco_opcode::scratch_load_ubyte_d16: + case aco_opcode::scratch_load_sbyte_d16: case aco_opcode::scratch_load_short_d16: case aco_opcode::global_load_ubyte_d16: + case aco_opcode::global_load_sbyte_d16: case aco_opcode::global_load_short_d16: case aco_opcode::ds_read_u8_d16: + case aco_opcode::ds_read_i8_d16: case aco_opcode::ds_read_u16_d16: case aco_opcode::buffer_load_ubyte_d16_hi: case aco_opcode::buffer_load_sbyte_d16_hi: case aco_opcode::buffer_load_short_d16_hi: case aco_opcode::buffer_load_format_d16_hi_x: case aco_opcode::flat_load_ubyte_d16_hi: + case aco_opcode::flat_load_sbyte_d16_hi: case aco_opcode::flat_load_short_d16_hi: case aco_opcode::scratch_load_ubyte_d16_hi: + case aco_opcode::scratch_load_sbyte_d16_hi: case aco_opcode::scratch_load_short_d16_hi: case aco_opcode::global_load_ubyte_d16_hi: + case aco_opcode::global_load_sbyte_d16_hi: case aco_opcode::global_load_short_d16_hi: case aco_opcode::ds_read_u8_d16_hi: + case aco_opcode::ds_read_i8_d16_hi: case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2; case aco_opcode::buffer_load_format_d16_xyz: case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;