diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index afb0fd0895b..f0cba7467af 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2861,9 +2861,14 @@ late_optimizations.extend([ (('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'), (('ineg', a), ('isub', 0, a), 'options->lower_ineg'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), - + # On Intel GPUs, the constant field for an ADD3 instruction must be either + # int16_t or uint16_t. (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), + (('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), + (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'), (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), + (('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), + (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), # fneg_lo / fneg_hi (('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 0ba0965e8fb..06abe924391 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -284,6 +284,27 @@ is_first_5_bits_uge_2(UNUSED struct hash_table *ht, const nir_alu_instr *instr, return true; } +/** Is this a constant that could be either int16_t or uint16_t? */ +static inline bool +is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + /* only constant srcs: */ + if (!nir_src_is_const(instr->src[src].src)) + return false; + + for (unsigned i = 0; i < num_components; i++) { + const int64_t val = + nir_src_comp_as_int(instr->src[src].src, swizzle[i]); + + if (val > 0xffff || val < -0x8000) + return false; + } + + return true; +} + static inline bool is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 1cd1a2fab4f..56e852a41fc 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -983,6 +983,30 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case BRW_OPCODE_ADD3: + /* add3 can have a single imm16 source. Proceed if the source type is + * already W or UW or the value can be coerced to one of those types. + */ + if (val.type == BRW_REGISTER_TYPE_W || val.type == BRW_REGISTER_TYPE_UW) + ; /* Nothing to do. */ + else if (val.ud <= 0xffff) + val = brw_imm_uw(val.ud); + else if (val.d >= -0x8000 && val.d <= 0x7fff) + val = brw_imm_w(val.d); + else + break; + + if (i == 2) { + inst->src[i] = val; + progress = true; + } else if (inst->src[2].file != IMM) { + inst->src[i] = inst->src[2]; + inst->src[2] = val; + progress = true; + } + + break; + case BRW_OPCODE_CMP: case BRW_OPCODE_IF: if (i == 1) { @@ -1088,6 +1112,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } } + /* ADD3 can only have the immediate as src0. */ + if (progress && inst->opcode == BRW_OPCODE_ADD3) { + if (inst->src[2].file == IMM) { + const auto src0 = inst->src[0]; + inst->src[0] = inst->src[2]; + inst->src[2] = src0; + } + } + return progress; }