diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 9a6434d94dd..286616d629b 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -370,6 +370,12 @@ v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack) return ".hh"; case V3D_QPU_UNPACK_SWAP_16: return ".swp"; + case V3D71_QPU_UNPACK_SAT: + return ".sat"; + case V3D71_QPU_UNPACK_NSAT: + return ".nsat"; + case V3D71_QPU_UNPACK_MAX0: + return ".max0"; default: unreachable("bad unpack value"); } diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index fe9b5d3a00f..9a05fc8b7dc 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -293,6 +293,13 @@ enum v3d_qpu_input_unpack { /** Convert high 16 bits from 16-bit float to 32-bit float. */ V3D_QPU_UNPACK_H, + /* Saturate 32-bit floating point to [0.0, 1.0] */ + V3D71_QPU_UNPACK_SAT, + /* Saturate 32-bit floating point to [-1.0, 1.0] */ + V3D71_QPU_UNPACK_NSAT, + /* Saturate 32-bit floating point to [0.0, +inf] */ + V3D71_QPU_UNPACK_MAX0, + /** Convert to 16f and replicate it to the high bits. */ V3D_QPU_UNPACK_REPLICATE_32F_16, diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c index f535edcea1b..6d44b01aadc 100644 --- a/src/broadcom/qpu/qpu_pack.c +++ b/src/broadcom/qpu/qpu_pack.c @@ -831,7 +831,8 @@ lookup_opcode_from_packed(const struct v3d_device_info *devinfo, } static bool -v3d_qpu_float32_unpack_unpack(uint32_t packed, +v3d_qpu_float32_unpack_unpack(const struct v3d_device_info *devinfo, + uint32_t packed, enum v3d_qpu_input_unpack *unpacked) { switch (packed) { @@ -847,13 +848,23 @@ v3d_qpu_float32_unpack_unpack(uint32_t packed, case 3: *unpacked = V3D_QPU_UNPACK_H; return true; + case 4: + *unpacked = V3D71_QPU_UNPACK_SAT; + return devinfo->ver >= 71; + case 5: + *unpacked = V3D71_QPU_UNPACK_NSAT; + return devinfo->ver >= 71; + case 6: + *unpacked = V3D71_QPU_UNPACK_MAX0; + return devinfo->ver >= 71; default: return false; } } static bool -v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, +v3d_qpu_float32_unpack_pack(const struct v3d_device_info *devinfo, + enum v3d_qpu_input_unpack unpacked, uint32_t *packed) { switch (unpacked) { @@ -869,6 +880,15 @@ v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, case V3D_QPU_UNPACK_H: *packed = 3; return true; + case V3D71_QPU_UNPACK_SAT: + *packed = 4; + return devinfo->ver >= 71; + case V3D71_QPU_UNPACK_NSAT: + *packed = 5; + return devinfo->ver >= 71; + case V3D71_QPU_UNPACK_MAX0: + *packed = 6; + return devinfo->ver >= 71; default: return false; } @@ -1069,12 +1089,12 @@ v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst else instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3, &instr->alu.add.b.unpack)) { return false; } @@ -1088,7 +1108,7 @@ v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_A_FDY: instr->alu.add.output_pack = mux_b & 0x3; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } @@ -1100,7 +1120,7 @@ v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_A_FTOC: instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } @@ -1221,12 +1241,12 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst instr->alu.add.output_pack = V3D_QPU_PACK_NONE; } - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3, &instr->alu.add.b.unpack)) { return false; } @@ -1240,7 +1260,7 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_A_FDY: instr->alu.add.output_pack = raddr_b & 0x3; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } @@ -1252,7 +1272,7 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_A_FTOC: instr->alu.add.output_pack = V3D_QPU_PACK_NONE; - if (!v3d_qpu_float32_unpack_unpack((raddr_b >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_b >> 2) & 0x3, &instr->alu.add.a.unpack)) { return false; } @@ -1287,7 +1307,7 @@ v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst if (unpack == 7) return false; - if (!v3d_qpu_float32_unpack_unpack(unpack, + if (!v3d_qpu_float32_unpack_unpack(devinfo, unpack, &instr->alu.add.a.unpack)) { return false; } @@ -1359,12 +1379,12 @@ v3d42_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_M_FMUL: instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.mul.a.unpack)) { return false; } - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3, &instr->alu.mul.b.unpack)) { return false; } @@ -1375,7 +1395,7 @@ v3d42_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst instr->alu.mul.output_pack = (((op & 1) << 1) + ((mux_b >> 2) & 1)); - if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, mux_b & 0x3, &instr->alu.mul.a.unpack)) { return false; } @@ -1434,12 +1454,12 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_M_FMUL: instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; - if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3, &instr->alu.mul.a.unpack)) { return false; } - if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3, &instr->alu.mul.b.unpack)) { return false; } @@ -1449,7 +1469,7 @@ v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst case V3D_QPU_M_FMOV: instr->alu.mul.output_pack = raddr_d & 0x3; - if (!v3d_qpu_float32_unpack_unpack((raddr_d >> 2) & 0x7, + if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_d >> 2) & 0x3, &instr->alu.mul.a.unpack)) { return false; } @@ -1601,12 +1621,14 @@ v3d42_qpu_add_pack(const struct v3d_device_info *devinfo, } opcode |= output_pack << 4; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &a_unpack)) { return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.b.unpack, &b_unpack)) { return false; } @@ -1645,12 +1667,14 @@ v3d42_qpu_add_pack(const struct v3d_device_info *devinfo, return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &a_unpack)) { return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.b.unpack, &b_unpack)) { return false; } @@ -1675,7 +1699,8 @@ v3d42_qpu_add_pack(const struct v3d_device_info *devinfo, } mux_b |= packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &packed)) { return false; } @@ -1693,7 +1718,8 @@ v3d42_qpu_add_pack(const struct v3d_device_info *devinfo, return false; uint32_t packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &packed)) { return false; } @@ -1814,12 +1840,14 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, opcode |= output_pack << 4; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &a_unpack)) { return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.b.unpack, &b_unpack)) { return false; } @@ -1876,12 +1904,14 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &a_unpack)) { return false; } - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.b.unpack, &b_unpack)) { return false; } @@ -1906,7 +1936,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, } raddr_b |= packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &packed)) { return false; } @@ -1924,7 +1955,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, return false; uint32_t packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &packed)) { return false; } @@ -1973,7 +2005,8 @@ v3d71_qpu_add_pack(const struct v3d_device_info *devinfo, } raddr_b = packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.add.a.unpack, &packed)) { return false; } @@ -2041,13 +2074,15 @@ v3d42_qpu_mul_pack(const struct v3d_device_info *devinfo, */ opcode += packed << 4; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.a.unpack, &packed)) { return false; } opcode |= packed << 2; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.b.unpack, &packed)) { return false; } @@ -2065,7 +2100,8 @@ v3d42_qpu_mul_pack(const struct v3d_device_info *devinfo, opcode |= (packed >> 1) & 1; mux_b = (packed & 1) << 2; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.a.unpack, &packed)) { return false; } @@ -2151,13 +2187,15 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, */ opcode += packed << 4; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.a.unpack, &packed)) { return false; } opcode |= packed << 2; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.b.unpack, &packed)) { return false; } @@ -2174,7 +2212,8 @@ v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo, } raddr_d |= packed; - if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a.unpack, + if (!v3d_qpu_float32_unpack_pack(devinfo, + instr->alu.mul.a.unpack, &packed)) { return false; }