From 9479e3a19b9e08b8525ba8b91a891b8cff03ace3 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 3 Feb 2022 10:45:58 -0800 Subject: [PATCH] intel/fs: Allow constant copy prop from DW to W This enables copy propagation of mov(8) g5<1>UD 0x00000180UD mul(8) g10<1>D g2.3<0,1,0>D g5<16,8,2>W into mul(8) g10<1>D g2.3<0,1,0>D 180W This is necessary for any optimization passes that generate imul_32x16 instructions. No fossil-db or shader-db changes on any Intel platform. v2: Fix type size check to (src size != 2) || (dest size != 4). It was previously &&. :( This allowed copying constants into UB sources, and that is invalid. v3: Fix incorrect extraction of upper 16-bits of immediate value when subnr=2. Noticed by Caio. Reviewed-by: Caio Oliveira Part-of: --- .../compiler/brw_fs_copy_propagation.cpp | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 9819b5dcbca..3f223c92d5c 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -766,15 +766,39 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) entry->dst, entry->size_written)) continue; - /* If the type sizes don't match each channel of the instruction is - * either extracting a portion of the constant (which could be handled - * with some effort but the code below doesn't) or reading multiple - * channels of the source at once. + /* If the size of the use type is larger than the size of the entry + * type, the entry doesn't contain all of the data that the user is + * trying to use. */ - if (type_sz(inst->src[i].type) != type_sz(entry->dst.type)) + if (type_sz(inst->src[i].type) > type_sz(entry->dst.type)) continue; fs_reg val = entry->src; + + /* If the size of the use type is smaller than the size of the entry, + * clamp the value to the range of the use type. This enables constant + * copy propagation in cases like + * + * + * mov(8) g12<1>UD 0x0000000cUD + * ... + * mul(8) g47<1>D g86<8,8,1>D g12<16,8,2>W + */ + if (type_sz(inst->src[i].type) < type_sz(entry->dst.type)) { + if (type_sz(inst->src[i].type) != 2 || type_sz(entry->dst.type) != 4) + continue; + + assert(inst->src[i].subnr == 0 || inst->src[i].subnr == 2); + + /* When subnr is 0, we want the lower 16-bits, and when it's 2, we + * want the upper 16-bits. No other values of subnr are valid for a + * UD source. + */ + const uint16_t v = inst->src[i].subnr == 2 ? val.ud >> 16 : val.ud; + + val.ud = v | (uint32_t(v) << 16); + } + val.type = inst->src[i].type; if (inst->src[i].abs) {