diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 7cc29115a07..d50dba44810 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -162,36 +162,11 @@ dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail # https://gitlab.freedesktop.org/mesa/mesa/-/issues/3208 -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_tessc,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_tesse,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_vert,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_geom,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_tessc,Fail dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index fb7130394d8..92b9bd19c6a 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1159,6 +1159,137 @@ static inline unsigned ir3_cat3_absneg(opc_t opc) } } +/* Return the type (float, int, or uint) the op uses when converting from the + * internal result of the op (which is assumed to be the same size as the + * sources) to the destination when they are not the same size. If F32 it does + * a floating-point conversion, if U32 it does a truncation/zero-extension, if + * S32 it does a truncation/sign-extension. "can_fold" will be false if it + * doesn't do anything sensible or is unknown. + */ +static inline type_t +ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold) +{ + *can_fold = true; + switch (instr->opc) { + case OPC_ADD_F: + case OPC_MUL_F: + case OPC_BARY_F: + case OPC_MAD_F32: + case OPC_MAD_F16: + return TYPE_F32; + + case OPC_ADD_U: + case OPC_SUB_U: + case OPC_MIN_U: + case OPC_MAX_U: + case OPC_AND_B: + case OPC_OR_B: + case OPC_NOT_B: + case OPC_XOR_B: + case OPC_MUL_U24: + case OPC_MULL_U: + case OPC_SHL_B: + case OPC_SHR_B: + case OPC_ASHR_B: + case OPC_MAD_U24: + /* Comparison ops zero-extend/truncate their results, so consider them as + * unsigned here. + */ + case OPC_CMPS_F: + case OPC_CMPV_F: + case OPC_CMPS_U: + case OPC_CMPS_S: + return TYPE_U32; + + case OPC_ADD_S: + case OPC_SUB_S: + case OPC_MIN_S: + case OPC_MAX_S: + case OPC_ABSNEG_S: + case OPC_MUL_S24: + case OPC_MAD_S24: + return TYPE_S32; + + /* We assume that any move->move folding that could be done was done by + * NIR. + */ + case OPC_MOV: + default: + *can_fold = false; + return TYPE_U32; + } +} + +/* Return the src and dst types for the conversion which is already folded + * into the op. We can assume that instr has folded in a conversion from + * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense + * to call if ir3_output_conv_type() returns can_fold = true. + */ +static inline type_t +ir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type) +{ + switch (instr->opc) { + case OPC_CMPS_F: + case OPC_CMPV_F: + case OPC_CMPS_U: + case OPC_CMPS_S: + /* Comparisons only return 0/1 and the size of the comparison sources + * is irrelevant, never consider them as having an output conversion + * by returning a type with the dest size here: + */ + return (instr->regs[0]->flags & IR3_REG_HALF) ? half_type(base_type) : + full_type(base_type); + + case OPC_BARY_F: + /* bary.f doesn't have an explicit source, but we can assume here that + * the varying data it reads is in fp32. + * + * This may be fp16 on older gen's depending on some register + * settings, but it's probably not worth plumbing that through for a + * small improvement that NIR would hopefully handle for us anyway. + */ + return TYPE_F32; + + default: + return (instr->regs[1]->flags & IR3_REG_HALF) ? half_type(base_type) : + full_type(base_type); + } +} + +static inline type_t +ir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type) +{ + return (instr->regs[0]->flags & IR3_REG_HALF) ? half_type(base_type) : + full_type(base_type); +} + +/* Some instructions have signed/unsigned variants which are identical except + * for whether the folded conversion sign-extends or zero-extends, and we can + * fold in a mismatching move by rewriting the opcode. Return the opcode to + * switch signedness, and whether one exists. + */ +static inline opc_t +ir3_try_swap_signedness(opc_t opc, bool *can_swap) +{ + switch (opc) { +#define PAIR(u, s) \ + case OPC_##u: \ + return OPC_##s; \ + case OPC_##s: \ + return OPC_##u; + PAIR(ADD_U, ADD_S) + PAIR(SUB_U, SUB_S) + /* Note: these are only identical when the sources are half, but that's + * the only case we call this function for anyway. + */ + PAIR(MUL_U24, MUL_S24) + + default: + *can_swap = false; + return opc; + } +} + #define MASK(n) ((1 << (n)) - 1) /* iterator for an instructions's sources (reg), also returns src #: */ diff --git a/src/freedreno/ir3/ir3_cf.c b/src/freedreno/ir3/ir3_cf.c index d479bc10759..1f11810cda7 100644 --- a/src/freedreno/ir3/ir3_cf.c +++ b/src/freedreno/ir3/ir3_cf.c @@ -26,11 +26,19 @@ #include "ir3.h" static bool -is_fp16_conv(struct ir3_instruction *instr) +is_safe_conv(struct ir3_instruction *instr, type_t src_type, + opc_t *src_opc) { if (instr->opc != OPC_MOV) return false; + /* Only allow half->full or full->half without any type conversion (like + * int to float). + */ + if (type_size(instr->cat1.src_type) == type_size(instr->cat1.dst_type) || + full_type(instr->cat1.src_type) != full_type(instr->cat1.dst_type)) + return false; + struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; @@ -45,23 +53,47 @@ is_fp16_conv(struct ir3_instruction *instr) if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) return false; - if (instr->cat1.src_type == TYPE_F32 && - instr->cat1.dst_type == TYPE_F16) + /* Check that the source of the conv matches the type of the src + * instruction. + */ + if (src_type == instr->cat1.src_type) return true; - if (instr->cat1.src_type == TYPE_F16 && - instr->cat1.dst_type == TYPE_F32) + /* We can handle mismatches with integer types by converting the opcode + * but not when an integer is reinterpreted as a float or vice-versa. + */ + if (type_float(src_type) != type_float(instr->cat1.src_type)) + return false; + + /* We have types with mismatched signedness. Mismatches on the signedness + * don't matter when narrowing: + */ + if (type_size(instr->cat1.dst_type) < type_size(instr->cat1.src_type)) return true; - return false; + /* Try swapping the opcode: */ + bool can_swap = true; + *src_opc = ir3_try_swap_signedness(*src_opc, &can_swap); + return can_swap; } static bool -all_uses_fp16_conv(struct ir3_instruction *conv_src) +all_uses_safe_conv(struct ir3_instruction *conv_src, type_t src_type) { - foreach_ssa_use (use, conv_src) - if (!is_fp16_conv(use)) + opc_t opc = conv_src->opc; + bool first = true; + foreach_ssa_use (use, conv_src) { + opc_t new_opc = opc; + if (!is_safe_conv(use, src_type, &new_opc)) return false; + /* Check if multiple uses have conflicting requirements on the opcode. + */ + if (!first && opc != new_opc) + return false; + first = false; + opc = new_opc; + } + conv_src->opc = opc; return true; } @@ -74,7 +106,7 @@ static void rewrite_src_uses(struct ir3_instruction *src) { foreach_ssa_use (use, src) { - assert(is_fp16_conv(use)); + assert(use->opc == OPC_MOV); if (is_half(src)) { use->regs[1]->flags |= IR3_REG_HALF; @@ -91,7 +123,7 @@ try_conversion_folding(struct ir3_instruction *conv) { struct ir3_instruction *src; - if (!is_fp16_conv(conv)) + if (conv->opc != OPC_MOV) return false; /* NOTE: we can have non-ssa srcs after copy propagation: */ @@ -102,51 +134,23 @@ try_conversion_folding(struct ir3_instruction *conv) if (!is_alu(src)) return false; - /* avoid folding f2f32(f2f16) together, in cases where this is legal to - * do (glsl) nir should have handled that for us already: + bool can_fold; + type_t base_type = ir3_output_conv_type(src, &can_fold); + if (!can_fold) + return false; + + type_t src_type = ir3_output_conv_src_type(src, base_type); + type_t dst_type = ir3_output_conv_dst_type(src, base_type); + + /* Avoid cases where we've already folded in a conversion. We assume that + * if there is a chain of conversions that's foldable then it's been + * folded in NIR already. */ - if (is_fp16_conv(src)) + if (src_type != dst_type) return false; - switch (src->opc) { - case OPC_SEL_B32: - case OPC_SEL_B16: - case OPC_MAX_F: - case OPC_MIN_F: - case OPC_SIGN_F: - case OPC_ABSNEG_F: + if (!all_uses_safe_conv(src, src_type)) return false; - case OPC_MOV: - /* if src is a "cov" and type doesn't match, then it can't be folded - * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32 - */ - if (src->cat1.dst_type != src->cat1.src_type && - conv->cat1.src_type != src->cat1.dst_type) - return false; - break; - default: - break; - } - - if (!all_uses_fp16_conv(src)) - return false; - - if (src->opc == OPC_MOV) { - if (src->cat1.dst_type == src->cat1.src_type) { - /* If we're folding a conversion into a bitwise move, we need to - * change the dst type to F32 to get the right behavior, since we - * could be moving a float with a u32.u32 move. - */ - src->cat1.dst_type = conv->cat1.dst_type; - src->cat1.src_type = conv->cat1.src_type; - } else { - /* Otherwise, for typechanging movs, we can just change the dst - * type to F16 to collaps the two conversions. For example - * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16. - */ - src->cat1.dst_type = conv->cat1.dst_type; - } - } ir3_set_dst_type(src, is_half(conv)); rewrite_src_uses(src);