2017-02-23 13:56:15 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "nir.h"
|
|
|
|
|
#include "nir_builder.h"
|
|
|
|
|
|
2020-06-19 17:28:09 +02:00
|
|
|
#define COND_LOWER_OP(b, name, ...) \
|
|
|
|
|
(b->shader->options->lower_int64_options & \
|
|
|
|
|
nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
|
|
|
|
|
lower_##name##64(b, __VA_ARGS__) : nir_##name(b, __VA_ARGS__)
|
|
|
|
|
|
|
|
|
|
#define COND_LOWER_CMP(b, name, ...) \
|
|
|
|
|
(b->shader->options->lower_int64_options & \
|
|
|
|
|
nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
|
|
|
|
|
lower_int64_compare(b, nir_op_##name, __VA_ARGS__) : \
|
|
|
|
|
nir_##name(b, __VA_ARGS__)
|
|
|
|
|
|
|
|
|
|
#define COND_LOWER_CAST(b, name, ...) \
|
|
|
|
|
(b->shader->options->lower_int64_options & \
|
|
|
|
|
nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
|
|
|
|
|
lower_##name(b, __VA_ARGS__) : \
|
|
|
|
|
nir_##name(b, __VA_ARGS__)
|
|
|
|
|
|
nir: Add some more int64 lowering helpers
[mattst88]: Found in an old branch of Jason's.
Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
imin, imax, umin, umax
Matt implemented: ixor, bcsel, b2i, i2b, i2i8, i2i16, i2i32, i2i64,
u2u8, u2u16, u2u32, u2u64, and fixed ilt
Reviewed-by: Elie Tournier <tournier.elie@gmail.com>
2017-02-23 21:40:55 -08:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_b2i64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_i2i8(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_i2i16(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_i2i16(b, nir_unpack_64_2x32_split_x(b, x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_i2i32(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_i2i64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_i2i32(b, x);
|
2020-08-21 11:21:33 -07:00
|
|
|
return nir_pack_64_2x32_split(b, x32, nir_ishr_imm(b, x32, 31));
|
nir: Add some more int64 lowering helpers
[mattst88]: Found in an old branch of Jason's.
Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
imin, imax, umin, umax
Matt implemented: ixor, bcsel, b2i, i2b, i2i8, i2i16, i2i32, i2i64,
u2u8, u2u16, u2u32, u2u64, and fixed ilt
Reviewed-by: Elie Tournier <tournier.elie@gmail.com>
2017-02-23 21:40:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_u2u8(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_u2u8(b, nir_unpack_64_2x32_split_x(b, x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_u2u16(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_u2u16(b, nir_unpack_64_2x32_split_x(b, x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_u2u32(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_u2u64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_u2u32(b, x);
|
|
|
|
|
return nir_pack_64_2x32_split(b, x32, nir_imm_int(b, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
|
|
|
|
|
nir_bcsel(b, cond, x_hi, y_hi));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_inot64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
|
|
|
|
|
nir_iand(b, x_hi, y_hi));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
|
|
|
|
|
nir_ior(b, x_hi, y_hi));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
|
|
|
|
|
nir_ixor(b, x_hi, y_hi));
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-12 17:22:16 -07:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
/* Implemented as
|
|
|
|
|
*
|
|
|
|
|
* uint64_t lshift(uint64_t x, int c)
|
|
|
|
|
* {
|
2022-11-25 01:56:07 +01:00
|
|
|
* c %= 64;
|
|
|
|
|
*
|
2018-07-12 17:22:16 -07:00
|
|
|
* if (c == 0) return x;
|
|
|
|
|
*
|
|
|
|
|
* uint32_t lo = LO(x), hi = HI(x);
|
|
|
|
|
*
|
|
|
|
|
* if (c < 32) {
|
|
|
|
|
* uint32_t lo_shifted = lo << c;
|
|
|
|
|
* uint32_t hi_shifted = hi << c;
|
|
|
|
|
* uint32_t lo_shifted_hi = lo >> abs(32 - c);
|
|
|
|
|
* return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
|
|
|
|
|
* } else {
|
|
|
|
|
* uint32_t lo_shifted_hi = lo << abs(32 - c);
|
|
|
|
|
* return pack_64(0, lo_shifted_hi);
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
2022-11-25 01:56:07 +01:00
|
|
|
y = nir_iand_imm(b, y, 0x3f);
|
2018-07-12 17:22:16 -07:00
|
|
|
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd_imm(b, y, -32));
|
2018-07-12 17:22:16 -07:00
|
|
|
nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
|
|
|
|
|
nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
|
|
|
|
|
nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res_if_lt_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, lo_shifted,
|
|
|
|
|
nir_ior(b, hi_shifted, lo_shifted_hi));
|
|
|
|
|
nir_ssa_def *res_if_ge_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
|
|
|
|
|
nir_ishl(b, x_lo, reverse_count));
|
|
|
|
|
|
2020-08-15 00:11:27 -05:00
|
|
|
return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_bcsel(b, nir_uge_imm(b, y, 32),
|
2018-07-12 17:22:16 -07:00
|
|
|
res_if_ge_32, res_if_lt_32));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
/* Implemented as
|
|
|
|
|
*
|
|
|
|
|
* uint64_t arshift(uint64_t x, int c)
|
|
|
|
|
* {
|
2022-11-25 01:56:07 +01:00
|
|
|
* c %= 64;
|
|
|
|
|
*
|
2018-07-12 17:22:16 -07:00
|
|
|
* if (c == 0) return x;
|
|
|
|
|
*
|
|
|
|
|
* uint32_t lo = LO(x);
|
|
|
|
|
* int32_t hi = HI(x);
|
|
|
|
|
*
|
|
|
|
|
* if (c < 32) {
|
|
|
|
|
* uint32_t lo_shifted = lo >> c;
|
|
|
|
|
* uint32_t hi_shifted = hi >> c;
|
|
|
|
|
* uint32_t hi_shifted_lo = hi << abs(32 - c);
|
|
|
|
|
* return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
|
|
|
|
|
* } else {
|
|
|
|
|
* uint32_t hi_shifted = hi >> 31;
|
|
|
|
|
* uint32_t hi_shifted_lo = hi >> abs(32 - c);
|
|
|
|
|
* return pack_64(hi_shifted, hi_shifted_lo);
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
2022-11-25 01:56:07 +01:00
|
|
|
y = nir_iand_imm(b, y, 0x3f);
|
2018-07-12 17:22:16 -07:00
|
|
|
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd_imm(b, y, -32));
|
2018-07-12 17:22:16 -07:00
|
|
|
nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
|
|
|
|
|
nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
|
|
|
|
|
nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res_if_lt_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
|
|
|
|
|
hi_shifted);
|
|
|
|
|
nir_ssa_def *res_if_ge_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
|
2023-06-05 12:36:39 +02:00
|
|
|
nir_ishr_imm(b, x_hi, 31));
|
2018-07-12 17:22:16 -07:00
|
|
|
|
2020-08-15 00:11:27 -05:00
|
|
|
return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_bcsel(b, nir_uge_imm(b, y, 32),
|
2018-07-12 17:22:16 -07:00
|
|
|
res_if_ge_32, res_if_lt_32));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
/* Implemented as
|
|
|
|
|
*
|
|
|
|
|
* uint64_t rshift(uint64_t x, int c)
|
|
|
|
|
* {
|
2022-11-25 01:56:07 +01:00
|
|
|
* c %= 64;
|
|
|
|
|
*
|
2018-07-12 17:22:16 -07:00
|
|
|
* if (c == 0) return x;
|
|
|
|
|
*
|
|
|
|
|
* uint32_t lo = LO(x), hi = HI(x);
|
|
|
|
|
*
|
|
|
|
|
* if (c < 32) {
|
|
|
|
|
* uint32_t lo_shifted = lo >> c;
|
|
|
|
|
* uint32_t hi_shifted = hi >> c;
|
|
|
|
|
* uint32_t hi_shifted_lo = hi << abs(32 - c);
|
|
|
|
|
* return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
|
|
|
|
|
* } else {
|
|
|
|
|
* uint32_t hi_shifted_lo = hi >> abs(32 - c);
|
|
|
|
|
* return pack_64(0, hi_shifted_lo);
|
|
|
|
|
* }
|
|
|
|
|
* }
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
2022-11-25 01:56:07 +01:00
|
|
|
y = nir_iand_imm(b, y, 0x3f);
|
2018-07-12 17:22:16 -07:00
|
|
|
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd_imm(b, y, -32));
|
2018-07-12 17:22:16 -07:00
|
|
|
nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
|
|
|
|
|
nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
|
|
|
|
|
nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res_if_lt_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
|
|
|
|
|
hi_shifted);
|
|
|
|
|
nir_ssa_def *res_if_ge_32 =
|
|
|
|
|
nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
|
|
|
|
|
nir_imm_int(b, 0));
|
|
|
|
|
|
2020-08-15 00:11:27 -05:00
|
|
|
return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_bcsel(b, nir_uge_imm(b, y, 32),
|
2018-07-12 17:22:16 -07:00
|
|
|
res_if_ge_32, res_if_lt_32));
|
|
|
|
|
}
|
|
|
|
|
|
nir: Add some more int64 lowering helpers
[mattst88]: Found in an old branch of Jason's.
Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
imin, imax, umin, umax
Matt implemented: ixor, bcsel, b2i, i2b, i2i8, i2i16, i2i32, i2i64,
u2u8, u2u16, u2u32, u2u64, and fixed ilt
Reviewed-by: Elie Tournier <tournier.elie@gmail.com>
2017-02-23 21:40:55 -08:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
|
|
|
|
|
nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, x_lo));
|
|
|
|
|
nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, res_lo, res_hi);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
|
|
|
|
|
nir_ssa_def *borrow = nir_ineg(b, nir_b2i32(b, nir_ult(b, x_lo, y_lo)));
|
|
|
|
|
nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, res_lo, res_hi);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ineg64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
/* Since isub is the same number of instructions (with better dependencies)
|
|
|
|
|
* as iadd, subtraction is actually more efficient for ineg than the usual
|
|
|
|
|
* 2's complement "flip the bits and add one".
|
|
|
|
|
*/
|
|
|
|
|
return lower_isub64(b, nir_imm_int64(b, 0), x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_iabs64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_ssa_def *x_is_neg = nir_ilt_imm(b, x_hi, 0);
|
nir: Add some more int64 lowering helpers
[mattst88]: Found in an old branch of Jason's.
Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
imin, imax, umin, umax
Matt implemented: ixor, bcsel, b2i, i2b, i2i8, i2i16, i2i32, i2i64,
u2u8, u2u16, u2u32, u2u64, and fixed ilt
Reviewed-by: Elie Tournier <tournier.elie@gmail.com>
2017-02-23 21:40:55 -08:00
|
|
|
return nir_bcsel(b, x_is_neg, nir_ineg(b, x), x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
|
case nir_op_ieq:
|
|
|
|
|
return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
|
|
|
|
|
case nir_op_ine:
|
|
|
|
|
return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
|
|
|
|
|
case nir_op_ult:
|
|
|
|
|
return nir_ior(b, nir_ult(b, x_hi, y_hi),
|
|
|
|
|
nir_iand(b, nir_ieq(b, x_hi, y_hi),
|
|
|
|
|
nir_ult(b, x_lo, y_lo)));
|
|
|
|
|
case nir_op_ilt:
|
|
|
|
|
return nir_ior(b, nir_ilt(b, x_hi, y_hi),
|
|
|
|
|
nir_iand(b, nir_ieq(b, x_hi, y_hi),
|
|
|
|
|
nir_ult(b, x_lo, y_lo)));
|
|
|
|
|
break;
|
|
|
|
|
case nir_op_uge:
|
|
|
|
|
/* Lower as !(x < y) in the hopes of better CSE */
|
|
|
|
|
return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
|
|
|
|
|
case nir_op_ige:
|
|
|
|
|
/* Lower as !(x < y) in the hopes of better CSE */
|
|
|
|
|
return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid comparison");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|
|
|
|
{
|
|
|
|
|
return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-14 23:08:39 -08:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_mul_2x32_64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
|
|
|
|
|
bool sign_extend)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *res_hi = sign_extend ? nir_imul_high(b, x, y)
|
|
|
|
|
: nir_umul_high(b, x, y);
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, nir_imul(b, x, y), res_hi);
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 13:56:15 -08:00
|
|
|
static nir_ssa_def *
|
2017-02-23 14:54:13 -08:00
|
|
|
lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
2017-02-23 13:56:15 -08:00
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
|
2019-02-14 23:08:39 -08:00
|
|
|
nir_ssa_def *mul_lo = nir_umul_2x32_64(b, x_lo, y_lo);
|
|
|
|
|
nir_ssa_def *res_hi = nir_iadd(b, nir_unpack_64_2x32_split_y(b, mul_lo),
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_iadd(b, nir_imul(b, x_lo, y_hi),
|
|
|
|
|
nir_imul(b, x_hi, y_lo)));
|
|
|
|
|
|
2019-02-14 23:08:39 -08:00
|
|
|
return nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, mul_lo),
|
|
|
|
|
res_hi);
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
|
|
|
|
|
2017-12-29 14:38:55 -08:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
|
|
|
|
|
bool sign_extend)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x32[4], *y32[4];
|
|
|
|
|
x32[0] = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
x32[1] = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
if (sign_extend) {
|
2020-08-21 11:21:33 -07:00
|
|
|
x32[2] = x32[3] = nir_ishr_imm(b, x32[1], 31);
|
2017-12-29 14:38:55 -08:00
|
|
|
} else {
|
|
|
|
|
x32[2] = x32[3] = nir_imm_int(b, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
y32[0] = nir_unpack_64_2x32_split_x(b, y);
|
|
|
|
|
y32[1] = nir_unpack_64_2x32_split_y(b, y);
|
|
|
|
|
if (sign_extend) {
|
2020-08-21 11:21:33 -07:00
|
|
|
y32[2] = y32[3] = nir_ishr_imm(b, y32[1], 31);
|
2017-12-29 14:38:55 -08:00
|
|
|
} else {
|
|
|
|
|
y32[2] = y32[3] = nir_imm_int(b, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res[8] = { NULL, };
|
|
|
|
|
|
|
|
|
|
/* Yes, the following generates a pile of code. However, we throw res[0]
|
|
|
|
|
* and res[1] away in the end and, if we're in the umul case, four of our
|
|
|
|
|
* eight dword operands will be constant zero and opt_algebraic will clean
|
|
|
|
|
* this up nicely.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
|
nir_ssa_def *carry = NULL;
|
|
|
|
|
for (unsigned j = 0; j < 4; j++) {
|
2022-04-08 15:06:11 -05:00
|
|
|
/* The maximum values of x32[i] and y32[j] are UINT32_MAX so the
|
2017-12-29 14:38:55 -08:00
|
|
|
* maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum
|
|
|
|
|
* value that will fit in tmp is
|
|
|
|
|
*
|
|
|
|
|
* UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
|
|
|
|
|
* = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
|
|
|
|
|
* = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
|
|
|
|
|
*
|
|
|
|
|
* so we're guaranteed that we can add in two more 32-bit values
|
|
|
|
|
* without overflowing tmp.
|
|
|
|
|
*/
|
2022-04-08 15:06:11 -05:00
|
|
|
nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[j]);
|
2019-02-14 23:08:39 -08:00
|
|
|
|
2017-12-29 14:38:55 -08:00
|
|
|
if (res[i + j])
|
|
|
|
|
tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
|
|
|
|
|
if (carry)
|
|
|
|
|
tmp = nir_iadd(b, tmp, carry);
|
|
|
|
|
res[i + j] = nir_u2u32(b, tmp);
|
2020-08-21 11:21:33 -07:00
|
|
|
carry = nir_ushr_imm(b, tmp, 32);
|
2017-12-29 14:38:55 -08:00
|
|
|
}
|
|
|
|
|
res[i + 4] = nir_u2u32(b, carry);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, res[2], res[3]);
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 13:56:15 -08:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_isign64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
|
2020-08-21 11:21:33 -07:00
|
|
|
nir_ssa_def *res_hi = nir_ishr_imm(b, x_hi, 31);
|
2018-11-07 13:43:40 -06:00
|
|
|
nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero));
|
2017-02-23 13:56:15 -08:00
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, res_lo, res_hi);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
|
|
|
|
|
nir_ssa_def **q, nir_ssa_def **r)
|
|
|
|
|
{
|
|
|
|
|
/* TODO: We should specially handle the case where the denominator is a
|
|
|
|
|
* constant. In that case, we should be able to reduce it to a multiply by
|
|
|
|
|
* a constant, some shifts, and an add.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
|
|
|
|
|
nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
|
|
|
|
|
nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
|
|
|
|
|
nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
|
|
|
|
|
|
2019-04-01 21:31:26 -05:00
|
|
|
nir_ssa_def *q_lo = nir_imm_zero(b, n->num_components, 32);
|
|
|
|
|
nir_ssa_def *q_hi = nir_imm_zero(b, n->num_components, 32);
|
2017-02-23 13:56:15 -08:00
|
|
|
|
|
|
|
|
nir_ssa_def *n_hi_before_if = n_hi;
|
|
|
|
|
nir_ssa_def *q_hi_before_if = q_hi;
|
|
|
|
|
|
|
|
|
|
/* If the upper 32 bits of denom are non-zero, it is impossible for shifts
|
|
|
|
|
* greater than 32 bits to occur. If the upper 32 bits of the numerator
|
|
|
|
|
* are zero, it is impossible for (denom << [63, 32]) <= numer unless
|
|
|
|
|
* denom == 0.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *need_high_div =
|
2020-08-15 00:11:27 -05:00
|
|
|
nir_iand(b, nir_ieq_imm(b, d_hi, 0), nir_uge(b, n_hi, d_lo));
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_push_if(b, nir_bany(b, need_high_div));
|
|
|
|
|
{
|
|
|
|
|
/* If we only have one component, then the bany above goes away and
|
|
|
|
|
* this is always true within the if statement.
|
|
|
|
|
*/
|
|
|
|
|
if (n->num_components == 1)
|
2018-10-19 09:35:49 -05:00
|
|
|
need_high_div = nir_imm_true(b);
|
2017-02-23 13:56:15 -08:00
|
|
|
|
|
|
|
|
nir_ssa_def *log2_d_lo = nir_ufind_msb(b, d_lo);
|
|
|
|
|
|
|
|
|
|
for (int i = 31; i >= 0; i--) {
|
|
|
|
|
/* if ((d.x << i) <= n.y) {
|
|
|
|
|
* n.y -= d.x << i;
|
|
|
|
|
* quot.y |= 1U << i;
|
|
|
|
|
* }
|
|
|
|
|
*/
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *d_shift = nir_ishl_imm(b, d_lo, i);
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_ssa_def *new_n_hi = nir_isub(b, n_hi, d_shift);
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *new_q_hi = nir_ior_imm(b, q_hi, 1ull << i);
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_ssa_def *cond = nir_iand(b, need_high_div,
|
|
|
|
|
nir_uge(b, n_hi, d_shift));
|
|
|
|
|
if (i != 0) {
|
|
|
|
|
/* log2_d_lo is always <= 31, so we don't need to bother with it
|
|
|
|
|
* in the last iteration.
|
|
|
|
|
*/
|
|
|
|
|
cond = nir_iand(b, cond,
|
2023-06-02 20:12:29 +02:00
|
|
|
nir_ile_imm(b, log2_d_lo, 31 - i));
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
|
|
|
|
n_hi = nir_bcsel(b, cond, new_n_hi, n_hi);
|
|
|
|
|
q_hi = nir_bcsel(b, cond, new_q_hi, q_hi);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
|
n_hi = nir_if_phi(b, n_hi, n_hi_before_if);
|
|
|
|
|
q_hi = nir_if_phi(b, q_hi, q_hi_before_if);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *log2_denom = nir_ufind_msb(b, d_hi);
|
|
|
|
|
|
|
|
|
|
n = nir_pack_64_2x32_split(b, n_lo, n_hi);
|
|
|
|
|
d = nir_pack_64_2x32_split(b, d_lo, d_hi);
|
|
|
|
|
for (int i = 31; i >= 0; i--) {
|
|
|
|
|
/* if ((d64 << i) <= n64) {
|
|
|
|
|
* n64 -= d64 << i;
|
|
|
|
|
* quot.x |= 1U << i;
|
|
|
|
|
* }
|
|
|
|
|
*/
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *d_shift = nir_ishl_imm(b, d, i);
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_ssa_def *new_n = nir_isub(b, n, d_shift);
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *new_q_lo = nir_ior_imm(b, q_lo, 1ull << i);
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_ssa_def *cond = nir_uge(b, n, d_shift);
|
|
|
|
|
if (i != 0) {
|
|
|
|
|
/* log2_denom is always <= 31, so we don't need to bother with it
|
|
|
|
|
* in the last iteration.
|
|
|
|
|
*/
|
|
|
|
|
cond = nir_iand(b, cond,
|
2023-06-02 20:12:29 +02:00
|
|
|
nir_ile_imm(b, log2_denom, 31 - i));
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
|
|
|
|
n = nir_bcsel(b, cond, new_n, n);
|
|
|
|
|
q_lo = nir_bcsel(b, cond, new_q_lo, q_lo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*q = nir_pack_64_2x32_split(b, q_lo, q_hi);
|
|
|
|
|
*r = n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_udiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *q, *r;
|
|
|
|
|
lower_udiv64_mod64(b, n, d, &q, &r);
|
|
|
|
|
return q;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_idiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
|
|
|
|
|
nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
|
|
|
|
|
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_ssa_def *negate = nir_ine(b, nir_ilt_imm(b, n_hi, 0),
|
|
|
|
|
nir_ilt_imm(b, d_hi, 0));
|
2017-02-23 13:56:15 -08:00
|
|
|
nir_ssa_def *q, *r;
|
|
|
|
|
lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
|
|
|
|
|
return nir_bcsel(b, negate, nir_ineg(b, q), q);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_umod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *q, *r;
|
|
|
|
|
lower_udiv64_mod64(b, n, d, &q, &r);
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_imod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
|
|
|
|
|
nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_ssa_def *n_is_neg = nir_ilt_imm(b, n_hi, 0);
|
|
|
|
|
nir_ssa_def *d_is_neg = nir_ilt_imm(b, d_hi, 0);
|
2017-02-23 13:56:15 -08:00
|
|
|
|
|
|
|
|
nir_ssa_def *q, *r;
|
|
|
|
|
lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
|
2017-03-01 15:20:31 -08:00
|
|
|
|
|
|
|
|
nir_ssa_def *rem = nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
|
|
|
|
|
|
2020-08-15 00:11:27 -05:00
|
|
|
return nir_bcsel(b, nir_ieq_imm(b, r, 0), nir_imm_int64(b, 0),
|
2017-03-01 15:20:31 -08:00
|
|
|
nir_bcsel(b, nir_ieq(b, n_is_neg, d_is_neg), rem,
|
|
|
|
|
nir_iadd(b, rem, d)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_ssa_def *n_is_neg = nir_ilt_imm(b, n_hi, 0);
|
2017-03-01 15:20:31 -08:00
|
|
|
|
|
|
|
|
nir_ssa_def *q, *r;
|
|
|
|
|
lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
|
|
|
|
|
return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
|
|
|
|
|
2019-07-15 10:31:49 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
|
|
|
|
|
{
|
|
|
|
|
assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
|
|
|
|
|
op == nir_op_extract_u16 || op == nir_op_extract_i16);
|
|
|
|
|
|
|
|
|
|
const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
|
|
|
|
|
const int chunk_bits =
|
|
|
|
|
(op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
|
|
|
|
|
const int num_chunks_in_32 = 32 / chunk_bits;
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *extract32;
|
|
|
|
|
if (chunk < num_chunks_in_32) {
|
|
|
|
|
extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
|
|
|
|
|
nir_imm_int(b, chunk),
|
|
|
|
|
NULL, NULL);
|
|
|
|
|
} else {
|
|
|
|
|
extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
|
|
|
|
|
nir_imm_int(b, chunk - num_chunks_in_32),
|
|
|
|
|
NULL, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
|
|
|
|
|
return lower_i2i64(b, extract32);
|
|
|
|
|
else
|
|
|
|
|
return lower_u2u64(b, extract32);
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-20 09:23:14 +10:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_ufind_msb64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo);
|
|
|
|
|
nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi);
|
2022-10-12 14:21:02 -07:00
|
|
|
|
|
|
|
|
if (b->shader->options->lower_uadd_sat) {
|
2023-06-02 12:25:58 +02:00
|
|
|
nir_ssa_def *valid_hi_bits = nir_ine_imm(b, x_hi, 0);
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_ssa_def *hi_res = nir_iadd_imm(b, hi_count, 32);
|
2022-10-12 14:21:02 -07:00
|
|
|
return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
|
|
|
|
|
} else {
|
|
|
|
|
/* If hi_count was -1, it will still be -1 after this uadd_sat. As a
|
|
|
|
|
* result, hi_count is either -1 or the correct return value for 64-bit
|
|
|
|
|
* ufind_msb.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
|
|
|
|
|
|
|
|
|
|
/* hi_res is either -1 or a value in the range [63, 32]. lo_count is
|
|
|
|
|
* either -1 or a value in the range [31, 0]. The imax will pick
|
|
|
|
|
* lo_count only when hi_res is -1. In those cases, lo_count is
|
|
|
|
|
* guaranteed to be the correct answer.
|
|
|
|
|
*/
|
|
|
|
|
return nir_imax(b, hi_res, lo_count);
|
|
|
|
|
}
|
2019-11-20 09:23:14 +10:00
|
|
|
}
|
|
|
|
|
|
2023-05-19 03:53:07 -07:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_find_lsb64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *lo_lsb = nir_find_lsb(b, x_lo);
|
|
|
|
|
nir_ssa_def *hi_lsb = nir_find_lsb(b, x_hi);
|
|
|
|
|
|
|
|
|
|
/* Use umin so that -1 (no bits found) becomes larger (0xFFFFFFFF)
|
|
|
|
|
* than any actual bit position, so we return a found bit instead.
|
|
|
|
|
*/
|
2023-06-16 19:43:30 +02:00
|
|
|
return nir_umin(b, lo_lsb, nir_iadd_imm(b, hi_lsb, 32));
|
2023-05-19 03:53:07 -07:00
|
|
|
}
|
|
|
|
|
|
2020-06-19 17:28:09 +02:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_2f(nir_builder *b, nir_ssa_def *x, unsigned dest_bit_size,
|
|
|
|
|
bool src_is_signed)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_sign = NULL;
|
|
|
|
|
|
|
|
|
|
if (src_is_signed) {
|
|
|
|
|
x_sign = nir_bcsel(b, COND_LOWER_CMP(b, ilt, x, nir_imm_int64(b, 0)),
|
|
|
|
|
nir_imm_floatN_t(b, -1, dest_bit_size),
|
|
|
|
|
nir_imm_floatN_t(b, 1, dest_bit_size));
|
|
|
|
|
x = COND_LOWER_OP(b, iabs, x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *exp = COND_LOWER_OP(b, ufind_msb, x);
|
|
|
|
|
unsigned significand_bits;
|
|
|
|
|
|
|
|
|
|
switch (dest_bit_size) {
|
2022-10-17 14:05:38 -07:00
|
|
|
case 64:
|
|
|
|
|
significand_bits = 52;
|
|
|
|
|
break;
|
2020-06-19 17:28:09 +02:00
|
|
|
case 32:
|
|
|
|
|
significand_bits = 23;
|
|
|
|
|
break;
|
|
|
|
|
case 16:
|
|
|
|
|
significand_bits = 10;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid dest_bit_size");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *discard =
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_imax(b, nir_iadd_imm(b, exp, -significand_bits),
|
2020-06-19 17:28:09 +02:00
|
|
|
nir_imm_int(b, 0));
|
2022-10-17 14:05:38 -07:00
|
|
|
nir_ssa_def *significand = COND_LOWER_OP(b, ushr, x, discard);
|
|
|
|
|
if (significand_bits < 32)
|
|
|
|
|
significand = COND_LOWER_CAST(b, u2u32, significand);
|
2020-08-12 14:28:48 +02:00
|
|
|
|
|
|
|
|
/* Round-to-nearest-even implementation:
|
|
|
|
|
* - if the non-representable part of the significand is higher than half
|
|
|
|
|
* the minimum representable significand, we round-up
|
|
|
|
|
* - if the non-representable part of the significand is equal to half the
|
|
|
|
|
* minimum representable significand and the representable part of the
|
|
|
|
|
* significand is odd, we round-up
|
|
|
|
|
* - in any other case, we round-down
|
2020-06-19 17:28:09 +02:00
|
|
|
*/
|
2020-08-12 14:28:48 +02:00
|
|
|
nir_ssa_def *lsb_mask = COND_LOWER_OP(b, ishl, nir_imm_int64(b, 1), discard);
|
|
|
|
|
nir_ssa_def *rem_mask = COND_LOWER_OP(b, isub, lsb_mask, nir_imm_int64(b, 1));
|
|
|
|
|
nir_ssa_def *half = COND_LOWER_OP(b, ishr, lsb_mask, nir_imm_int(b, 1));
|
|
|
|
|
nir_ssa_def *rem = COND_LOWER_OP(b, iand, x, rem_mask);
|
|
|
|
|
nir_ssa_def *halfway = nir_iand(b, COND_LOWER_CMP(b, ieq, rem, half),
|
2023-06-02 12:25:58 +02:00
|
|
|
nir_ine_imm(b, discard, 0));
|
2022-10-17 14:05:38 -07:00
|
|
|
nir_ssa_def *is_odd = COND_LOWER_CMP(b, ine, nir_imm_int64(b, 0),
|
|
|
|
|
COND_LOWER_OP(b, iand, x, lsb_mask));
|
2020-08-12 14:28:48 +02:00
|
|
|
nir_ssa_def *round_up = nir_ior(b, COND_LOWER_CMP(b, ilt, half, rem),
|
|
|
|
|
nir_iand(b, halfway, is_odd));
|
2022-10-17 14:05:38 -07:00
|
|
|
if (significand_bits >= 32)
|
|
|
|
|
significand = COND_LOWER_OP(b, iadd, significand,
|
|
|
|
|
COND_LOWER_CAST(b, b2i64, round_up));
|
|
|
|
|
else
|
|
|
|
|
significand = nir_iadd(b, significand, nir_b2i32(b, round_up));
|
2020-06-19 17:28:09 +02:00
|
|
|
|
|
|
|
|
nir_ssa_def *res;
|
|
|
|
|
|
2022-10-17 14:05:38 -07:00
|
|
|
if (dest_bit_size == 64) {
|
|
|
|
|
/* Compute the left shift required to normalize the original
|
|
|
|
|
* unrounded input manually.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *shift =
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_imax(b, nir_isub_imm(b, significand_bits, exp),
|
2022-10-17 14:05:38 -07:00
|
|
|
nir_imm_int(b, 0));
|
|
|
|
|
significand = COND_LOWER_OP(b, ishl, significand, shift);
|
|
|
|
|
|
|
|
|
|
/* Check whether normalization led to overflow of the available
|
|
|
|
|
* significand bits, which can only happen if round_up was true
|
|
|
|
|
* above, in which case we need to add carry to the exponent and
|
|
|
|
|
* discard an extra bit from the significand. Note that we
|
|
|
|
|
* don't need to repeat the round-up logic again, since the LSB
|
|
|
|
|
* of the significand is guaranteed to be zero if there was
|
|
|
|
|
* overflow.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *carry = nir_b2i32(
|
2023-05-08 14:00:41 +02:00
|
|
|
b, nir_uge_imm(b, nir_unpack_64_2x32_split_y(b, significand),
|
|
|
|
|
(uint64_t)(1 << (significand_bits - 31))));
|
2022-10-17 14:05:38 -07:00
|
|
|
significand = COND_LOWER_OP(b, ishr, significand, carry);
|
|
|
|
|
exp = nir_iadd(b, exp, carry);
|
|
|
|
|
|
|
|
|
|
/* Compute the biased exponent, taking care to handle a zero
|
|
|
|
|
* input correctly, which would have caused exp to be negative.
|
|
|
|
|
*/
|
2023-05-08 14:00:41 +02:00
|
|
|
nir_ssa_def *biased_exp = nir_bcsel(b, nir_ilt_imm(b, exp, 0),
|
2022-10-17 14:05:38 -07:00
|
|
|
nir_imm_int(b, 0),
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_iadd_imm(b, exp, 1023));
|
2022-10-17 14:05:38 -07:00
|
|
|
|
|
|
|
|
/* Pack the significand and exponent manually. */
|
|
|
|
|
nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, significand);
|
|
|
|
|
nir_ssa_def *hi = nir_bitfield_insert(
|
|
|
|
|
b, nir_unpack_64_2x32_split_y(b, significand),
|
|
|
|
|
biased_exp, nir_imm_int(b, 20), nir_imm_int(b, 11));
|
|
|
|
|
|
|
|
|
|
res = nir_pack_64_2x32_split(b, lo, hi);
|
|
|
|
|
|
|
|
|
|
} else if (dest_bit_size == 32) {
|
2020-06-19 17:28:09 +02:00
|
|
|
res = nir_fmul(b, nir_u2f32(b, significand),
|
|
|
|
|
nir_fexp2(b, nir_u2f32(b, discard)));
|
2022-10-17 14:05:38 -07:00
|
|
|
} else {
|
2020-06-19 17:28:09 +02:00
|
|
|
res = nir_fmul(b, nir_u2f16(b, significand),
|
|
|
|
|
nir_fexp2(b, nir_u2f16(b, discard)));
|
2022-10-17 14:05:38 -07:00
|
|
|
}
|
2020-06-19 17:28:09 +02:00
|
|
|
|
|
|
|
|
if (src_is_signed)
|
|
|
|
|
res = nir_fmul(b, res, x_sign);
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_f2(nir_builder *b, nir_ssa_def *x, bool dst_is_signed)
|
|
|
|
|
{
|
2022-10-17 14:00:59 -07:00
|
|
|
assert(x->bit_size == 16 || x->bit_size == 32 || x->bit_size == 64);
|
2020-06-19 17:28:09 +02:00
|
|
|
nir_ssa_def *x_sign = NULL;
|
|
|
|
|
|
|
|
|
|
if (dst_is_signed)
|
|
|
|
|
x_sign = nir_fsign(b, x);
|
|
|
|
|
|
|
|
|
|
x = nir_ftrunc(b, x);
|
|
|
|
|
|
2022-04-19 08:21:08 +02:00
|
|
|
if (dst_is_signed)
|
2020-06-19 17:28:09 +02:00
|
|
|
x = nir_fabs(b, x);
|
|
|
|
|
|
2022-10-28 16:27:17 -07:00
|
|
|
nir_ssa_def *res;
|
|
|
|
|
if (x->bit_size < 32) {
|
|
|
|
|
res = nir_pack_64_2x32_split(b, nir_f2u32(b, x), nir_imm_int(b, 0));
|
|
|
|
|
} else {
|
|
|
|
|
nir_ssa_def *div = nir_imm_floatN_t(b, 1ULL << 32, x->bit_size);
|
|
|
|
|
nir_ssa_def *res_hi = nir_f2u32(b, nir_fdiv(b, x, div));
|
|
|
|
|
nir_ssa_def *res_lo = nir_f2u32(b, nir_frem(b, x, div));
|
|
|
|
|
res = nir_pack_64_2x32_split(b, res_lo, res_hi);
|
|
|
|
|
}
|
2020-06-19 17:28:09 +02:00
|
|
|
|
|
|
|
|
if (dst_is_signed)
|
2023-05-08 14:00:41 +02:00
|
|
|
res = nir_bcsel(b, nir_flt_imm(b, x_sign, 0),
|
2020-06-19 17:28:09 +02:00
|
|
|
nir_ineg(b, res), res);
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-23 05:47:20 -07:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_bit_count64(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
|
|
|
|
|
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
|
|
|
|
|
nir_ssa_def *lo_count = nir_bit_count(b, x_lo);
|
|
|
|
|
nir_ssa_def *hi_count = nir_bit_count(b, x_hi);
|
|
|
|
|
return nir_iadd(b, lo_count, hi_count);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-01 17:39:54 -06:00
|
|
|
nir_lower_int64_options
|
|
|
|
|
nir_lower_int64_op_to_options_mask(nir_op opcode)
|
2017-02-23 14:54:13 -08:00
|
|
|
{
|
|
|
|
|
switch (opcode) {
|
|
|
|
|
case nir_op_imul:
|
2020-07-01 16:14:16 +01:00
|
|
|
case nir_op_amul:
|
2017-02-23 14:54:13 -08:00
|
|
|
return nir_lower_imul64;
|
2019-02-14 23:08:39 -08:00
|
|
|
case nir_op_imul_2x32_64:
|
|
|
|
|
case nir_op_umul_2x32_64:
|
|
|
|
|
return nir_lower_imul_2x32_64;
|
2017-12-29 14:38:55 -08:00
|
|
|
case nir_op_imul_high:
|
|
|
|
|
case nir_op_umul_high:
|
|
|
|
|
return nir_lower_imul_high64;
|
2017-02-23 14:54:13 -08:00
|
|
|
case nir_op_isign:
|
|
|
|
|
return nir_lower_isign64;
|
|
|
|
|
case nir_op_udiv:
|
|
|
|
|
case nir_op_idiv:
|
|
|
|
|
case nir_op_umod:
|
|
|
|
|
case nir_op_imod:
|
|
|
|
|
case nir_op_irem:
|
|
|
|
|
return nir_lower_divmod64;
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_b2i64:
|
2020-03-28 11:22:43 -05:00
|
|
|
case nir_op_i2i8:
|
|
|
|
|
case nir_op_i2i16:
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_i2i32:
|
|
|
|
|
case nir_op_i2i64:
|
2020-03-28 11:22:43 -05:00
|
|
|
case nir_op_u2u8:
|
|
|
|
|
case nir_op_u2u16:
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_u2u32:
|
|
|
|
|
case nir_op_u2u64:
|
2022-10-17 14:05:38 -07:00
|
|
|
case nir_op_i2f64:
|
|
|
|
|
case nir_op_u2f64:
|
2020-06-19 17:28:09 +02:00
|
|
|
case nir_op_i2f32:
|
|
|
|
|
case nir_op_u2f32:
|
|
|
|
|
case nir_op_i2f16:
|
|
|
|
|
case nir_op_u2f16:
|
|
|
|
|
case nir_op_f2i64:
|
|
|
|
|
case nir_op_f2u64:
|
2023-06-27 19:19:39 +01:00
|
|
|
return nir_lower_conv64;
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_bcsel:
|
2023-06-27 19:19:39 +01:00
|
|
|
return nir_lower_bcsel64;
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_ieq:
|
|
|
|
|
case nir_op_ine:
|
|
|
|
|
case nir_op_ult:
|
|
|
|
|
case nir_op_ilt:
|
|
|
|
|
case nir_op_uge:
|
|
|
|
|
case nir_op_ige:
|
|
|
|
|
return nir_lower_icmp64;
|
|
|
|
|
case nir_op_iadd:
|
|
|
|
|
case nir_op_isub:
|
|
|
|
|
return nir_lower_iadd64;
|
|
|
|
|
case nir_op_imin:
|
|
|
|
|
case nir_op_imax:
|
|
|
|
|
case nir_op_umin:
|
|
|
|
|
case nir_op_umax:
|
|
|
|
|
return nir_lower_minmax64;
|
|
|
|
|
case nir_op_iabs:
|
|
|
|
|
return nir_lower_iabs64;
|
|
|
|
|
case nir_op_ineg:
|
|
|
|
|
return nir_lower_ineg64;
|
|
|
|
|
case nir_op_iand:
|
|
|
|
|
case nir_op_ior:
|
|
|
|
|
case nir_op_ixor:
|
|
|
|
|
case nir_op_inot:
|
|
|
|
|
return nir_lower_logic64;
|
2018-07-12 17:22:16 -07:00
|
|
|
case nir_op_ishl:
|
|
|
|
|
case nir_op_ishr:
|
|
|
|
|
case nir_op_ushr:
|
|
|
|
|
return nir_lower_shift64;
|
2019-07-15 10:31:49 -05:00
|
|
|
case nir_op_extract_u8:
|
|
|
|
|
case nir_op_extract_i8:
|
|
|
|
|
case nir_op_extract_u16:
|
|
|
|
|
case nir_op_extract_i16:
|
|
|
|
|
return nir_lower_extract64;
|
2019-11-20 09:23:14 +10:00
|
|
|
case nir_op_ufind_msb:
|
|
|
|
|
return nir_lower_ufind_msb64;
|
2023-05-19 03:53:07 -07:00
|
|
|
case nir_op_find_lsb:
|
|
|
|
|
return nir_lower_find_lsb64;
|
2020-06-23 05:47:20 -07:00
|
|
|
case nir_op_bit_count:
|
|
|
|
|
return nir_lower_bit_count64;
|
2017-02-23 14:54:13 -08:00
|
|
|
default:
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
2020-10-26 10:41:41 -05:00
|
|
|
lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
|
2017-02-23 14:54:13 -08:00
|
|
|
{
|
|
|
|
|
nir_ssa_def *src[4];
|
|
|
|
|
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
|
|
|
|
|
src[i] = nir_ssa_for_alu_src(b, alu, i);
|
|
|
|
|
|
|
|
|
|
switch (alu->op) {
|
|
|
|
|
case nir_op_imul:
|
2020-07-01 16:14:16 +01:00
|
|
|
case nir_op_amul:
|
2017-02-23 14:54:13 -08:00
|
|
|
return lower_imul64(b, src[0], src[1]);
|
2019-02-14 23:08:39 -08:00
|
|
|
case nir_op_imul_2x32_64:
|
|
|
|
|
return lower_mul_2x32_64(b, src[0], src[1], true);
|
|
|
|
|
case nir_op_umul_2x32_64:
|
|
|
|
|
return lower_mul_2x32_64(b, src[0], src[1], false);
|
2017-12-29 14:38:55 -08:00
|
|
|
case nir_op_imul_high:
|
|
|
|
|
return lower_mul_high64(b, src[0], src[1], true);
|
|
|
|
|
case nir_op_umul_high:
|
|
|
|
|
return lower_mul_high64(b, src[0], src[1], false);
|
2017-02-23 14:54:13 -08:00
|
|
|
case nir_op_isign:
|
|
|
|
|
return lower_isign64(b, src[0]);
|
|
|
|
|
case nir_op_udiv:
|
|
|
|
|
return lower_udiv64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_idiv:
|
|
|
|
|
return lower_idiv64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_umod:
|
|
|
|
|
return lower_umod64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_imod:
|
|
|
|
|
return lower_imod64(b, src[0], src[1]);
|
2017-03-01 15:20:31 -08:00
|
|
|
case nir_op_irem:
|
|
|
|
|
return lower_irem64(b, src[0], src[1]);
|
2018-10-11 10:16:25 -07:00
|
|
|
case nir_op_b2i64:
|
|
|
|
|
return lower_b2i64(b, src[0]);
|
|
|
|
|
case nir_op_i2i8:
|
|
|
|
|
return lower_i2i8(b, src[0]);
|
|
|
|
|
case nir_op_i2i16:
|
|
|
|
|
return lower_i2i16(b, src[0]);
|
|
|
|
|
case nir_op_i2i32:
|
|
|
|
|
return lower_i2i32(b, src[0]);
|
|
|
|
|
case nir_op_i2i64:
|
|
|
|
|
return lower_i2i64(b, src[0]);
|
|
|
|
|
case nir_op_u2u8:
|
|
|
|
|
return lower_u2u8(b, src[0]);
|
|
|
|
|
case nir_op_u2u16:
|
|
|
|
|
return lower_u2u16(b, src[0]);
|
|
|
|
|
case nir_op_u2u32:
|
|
|
|
|
return lower_u2u32(b, src[0]);
|
|
|
|
|
case nir_op_u2u64:
|
|
|
|
|
return lower_u2u64(b, src[0]);
|
|
|
|
|
case nir_op_bcsel:
|
|
|
|
|
return lower_bcsel64(b, src[0], src[1], src[2]);
|
|
|
|
|
case nir_op_ieq:
|
|
|
|
|
case nir_op_ine:
|
|
|
|
|
case nir_op_ult:
|
|
|
|
|
case nir_op_ilt:
|
|
|
|
|
case nir_op_uge:
|
|
|
|
|
case nir_op_ige:
|
|
|
|
|
return lower_int64_compare(b, alu->op, src[0], src[1]);
|
|
|
|
|
case nir_op_iadd:
|
|
|
|
|
return lower_iadd64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_isub:
|
|
|
|
|
return lower_isub64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_imin:
|
|
|
|
|
return lower_imin64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_imax:
|
|
|
|
|
return lower_imax64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_umin:
|
|
|
|
|
return lower_umin64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_umax:
|
|
|
|
|
return lower_umax64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_iabs:
|
|
|
|
|
return lower_iabs64(b, src[0]);
|
|
|
|
|
case nir_op_ineg:
|
|
|
|
|
return lower_ineg64(b, src[0]);
|
|
|
|
|
case nir_op_iand:
|
|
|
|
|
return lower_iand64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_ior:
|
|
|
|
|
return lower_ior64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_ixor:
|
|
|
|
|
return lower_ixor64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_inot:
|
|
|
|
|
return lower_inot64(b, src[0]);
|
2018-07-12 17:22:16 -07:00
|
|
|
case nir_op_ishl:
|
|
|
|
|
return lower_ishl64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_ishr:
|
|
|
|
|
return lower_ishr64(b, src[0], src[1]);
|
|
|
|
|
case nir_op_ushr:
|
|
|
|
|
return lower_ushr64(b, src[0], src[1]);
|
2019-07-15 10:31:49 -05:00
|
|
|
case nir_op_extract_u8:
|
|
|
|
|
case nir_op_extract_i8:
|
|
|
|
|
case nir_op_extract_u16:
|
|
|
|
|
case nir_op_extract_i16:
|
|
|
|
|
return lower_extract(b, alu->op, src[0], src[1]);
|
2019-11-20 09:23:14 +10:00
|
|
|
case nir_op_ufind_msb:
|
|
|
|
|
return lower_ufind_msb64(b, src[0]);
|
2023-05-19 03:53:07 -07:00
|
|
|
case nir_op_find_lsb:
|
|
|
|
|
return lower_find_lsb64(b, src[0]);
|
2020-06-23 05:47:20 -07:00
|
|
|
case nir_op_bit_count:
|
|
|
|
|
return lower_bit_count64(b, src[0]);
|
2020-06-19 17:28:09 +02:00
|
|
|
case nir_op_i2f64:
|
|
|
|
|
case nir_op_i2f32:
|
|
|
|
|
case nir_op_i2f16:
|
|
|
|
|
return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), true);
|
|
|
|
|
case nir_op_u2f64:
|
|
|
|
|
case nir_op_u2f32:
|
|
|
|
|
case nir_op_u2f16:
|
|
|
|
|
return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), false);
|
|
|
|
|
case nir_op_f2i64:
|
|
|
|
|
case nir_op_f2u64:
|
|
|
|
|
return lower_f2(b, src[0], alu->op == nir_op_f2i64);
|
2017-02-23 14:54:13 -08:00
|
|
|
default:
|
|
|
|
|
unreachable("Invalid ALU opcode to lower");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 13:56:15 -08:00
|
|
|
static bool
|
2020-10-26 10:41:41 -05:00
|
|
|
should_lower_int64_alu_instr(const nir_alu_instr *alu,
|
|
|
|
|
const nir_shader_compiler_options *options)
|
2019-07-11 16:31:49 -05:00
|
|
|
{
|
|
|
|
|
switch (alu->op) {
|
2020-03-28 11:22:43 -05:00
|
|
|
case nir_op_i2i8:
|
|
|
|
|
case nir_op_i2i16:
|
2019-07-11 16:31:49 -05:00
|
|
|
case nir_op_i2i32:
|
2020-03-28 11:22:43 -05:00
|
|
|
case nir_op_u2u8:
|
|
|
|
|
case nir_op_u2u16:
|
2019-07-11 16:31:49 -05:00
|
|
|
case nir_op_u2u32:
|
|
|
|
|
if (alu->src[0].src.ssa->bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
|
|
|
|
case nir_op_bcsel:
|
|
|
|
|
assert(alu->src[1].src.ssa->bit_size ==
|
|
|
|
|
alu->src[2].src.ssa->bit_size);
|
|
|
|
|
if (alu->src[1].src.ssa->bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
|
|
|
|
case nir_op_ieq:
|
|
|
|
|
case nir_op_ine:
|
|
|
|
|
case nir_op_ult:
|
|
|
|
|
case nir_op_ilt:
|
|
|
|
|
case nir_op_uge:
|
|
|
|
|
case nir_op_ige:
|
|
|
|
|
assert(alu->src[0].src.ssa->bit_size ==
|
|
|
|
|
alu->src[1].src.ssa->bit_size);
|
|
|
|
|
if (alu->src[0].src.ssa->bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2019-11-20 09:23:14 +10:00
|
|
|
case nir_op_ufind_msb:
|
2023-05-19 03:53:07 -07:00
|
|
|
case nir_op_find_lsb:
|
2020-06-23 05:47:20 -07:00
|
|
|
case nir_op_bit_count:
|
2019-11-20 09:23:14 +10:00
|
|
|
if (alu->src[0].src.ssa->bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2020-07-01 16:14:16 +01:00
|
|
|
case nir_op_amul:
|
2020-07-13 20:28:16 +02:00
|
|
|
if (options->has_imul24)
|
2020-07-01 16:14:16 +01:00
|
|
|
return false;
|
|
|
|
|
if (alu->dest.dest.ssa.bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2020-06-19 17:28:09 +02:00
|
|
|
case nir_op_i2f64:
|
|
|
|
|
case nir_op_u2f64:
|
|
|
|
|
case nir_op_i2f32:
|
|
|
|
|
case nir_op_u2f32:
|
|
|
|
|
case nir_op_i2f16:
|
|
|
|
|
case nir_op_u2f16:
|
|
|
|
|
if (alu->src[0].src.ssa->bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
|
|
|
|
case nir_op_f2u64:
|
|
|
|
|
case nir_op_f2i64:
|
2021-04-10 17:11:58 +02:00
|
|
|
FALLTHROUGH;
|
2019-07-11 16:31:49 -05:00
|
|
|
default:
|
|
|
|
|
if (alu->dest.dest.ssa.bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2018-09-10 14:31:29 -07:00
|
|
|
}
|
2018-07-16 14:58:31 +02:00
|
|
|
|
2020-07-13 20:28:16 +02:00
|
|
|
unsigned mask = nir_lower_int64_op_to_options_mask(alu->op);
|
|
|
|
|
return (options->lower_int64_options & mask) != 0;
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
|
|
|
|
|
2020-10-26 10:50:35 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
split_64bit_subgroup_op(nir_builder *b, const nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
|
|
|
|
|
|
|
|
|
|
/* This works on subgroup ops with a single 64-bit source which can be
|
|
|
|
|
* trivially lowered by doing the exact same op on both halves.
|
|
|
|
|
*/
|
|
|
|
|
assert(intrin->src[0].is_ssa && intrin->src[0].ssa->bit_size == 64);
|
|
|
|
|
nir_ssa_def *split_src0[2] = {
|
|
|
|
|
nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
|
|
|
|
|
nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
assert(info->has_dest && intrin->dest.is_ssa &&
|
|
|
|
|
intrin->dest.ssa.bit_size == 64);
|
|
|
|
|
|
|
|
|
|
nir_ssa_def *res[2];
|
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
|
|
|
nir_intrinsic_instr *split =
|
|
|
|
|
nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
|
|
|
|
|
split->num_components = intrin->num_components;
|
|
|
|
|
split->src[0] = nir_src_for_ssa(split_src0[i]);
|
|
|
|
|
|
|
|
|
|
/* Other sources must be less than 64 bits and get copied directly */
|
|
|
|
|
for (unsigned j = 1; j < info->num_srcs; j++) {
|
|
|
|
|
assert(intrin->src[j].is_ssa && intrin->src[j].ssa->bit_size < 64);
|
|
|
|
|
split->src[j] = nir_src_for_ssa(intrin->src[j].ssa);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Copy const indices, if any */
|
|
|
|
|
memcpy(split->const_index, intrin->const_index,
|
|
|
|
|
sizeof(intrin->const_index));
|
|
|
|
|
|
|
|
|
|
nir_ssa_dest_init(&split->instr, &split->dest,
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
intrin->dest.ssa.num_components, 32);
|
2020-10-26 10:50:35 -05:00
|
|
|
nir_builder_instr_insert(b, &split->instr);
|
|
|
|
|
|
|
|
|
|
res[i] = &split->dest.ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nir_pack_64_2x32_split(b, res[0], res[1]);
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-26 23:08:26 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_vote_ieq(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *vote =
|
|
|
|
|
nir_intrinsic_instr_create(b->shader, nir_intrinsic_vote_ieq);
|
|
|
|
|
vote->src[0] = nir_src_for_ssa(x);
|
|
|
|
|
vote->num_components = x->num_components;
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
nir_ssa_dest_init(&vote->instr, &vote->dest, 1, 1);
|
2020-10-26 23:08:26 -05:00
|
|
|
nir_builder_instr_insert(b, &vote->instr);
|
|
|
|
|
return &vote->dest.ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_vote_ieq(nir_builder *b, nir_ssa_def *x)
|
|
|
|
|
{
|
|
|
|
|
return nir_iand(b, build_vote_ieq(b, nir_unpack_64_2x32_split_x(b, x)),
|
|
|
|
|
build_vote_ieq(b, nir_unpack_64_2x32_split_y(b, x)));
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-26 12:41:08 -05:00
|
|
|
static nir_ssa_def *
|
|
|
|
|
build_scan_intrinsic(nir_builder *b, nir_intrinsic_op scan_op,
|
|
|
|
|
nir_op reduction_op, unsigned cluster_size,
|
|
|
|
|
nir_ssa_def *val)
|
|
|
|
|
{
|
|
|
|
|
nir_intrinsic_instr *scan =
|
|
|
|
|
nir_intrinsic_instr_create(b->shader, scan_op);
|
|
|
|
|
scan->num_components = val->num_components;
|
|
|
|
|
scan->src[0] = nir_src_for_ssa(val);
|
|
|
|
|
nir_intrinsic_set_reduction_op(scan, reduction_op);
|
|
|
|
|
if (scan_op == nir_intrinsic_reduce)
|
|
|
|
|
nir_intrinsic_set_cluster_size(scan, cluster_size);
|
nir: Drop unused name from nir_ssa_dest_init
Since 624e799cc34 ("nir: Drop nir_ssa_def::name and nir_register::name"), SSA
defs don't have names, making the name argument unused. Drop it from the
signature and fix the call sites. This was done with the help of the following
Coccinelle semantic patch:
@@
expression A, B, C, D, E;
@@
-nir_ssa_dest_init(A, B, C, D, E);
+nir_ssa_dest_init(A, B, C, D);
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23078>
2023-05-17 09:08:22 -04:00
|
|
|
nir_ssa_dest_init(&scan->instr, &scan->dest, val->num_components,
|
|
|
|
|
val->bit_size);
|
2020-10-26 12:41:08 -05:00
|
|
|
nir_builder_instr_insert(b, &scan->instr);
|
|
|
|
|
return &scan->dest.ssa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_scan_iadd64(nir_builder *b, const nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
unsigned cluster_size =
|
|
|
|
|
intrin->intrinsic == nir_intrinsic_reduce ?
|
|
|
|
|
nir_intrinsic_cluster_size(intrin) : 0;
|
|
|
|
|
|
|
|
|
|
/* Split it into three chunks of no more than 24 bits each. With 8 bits
|
|
|
|
|
* of headroom, we're guaranteed that there will never be overflow in the
|
|
|
|
|
* individual subgroup operations. (Assuming, of course, a subgroup size
|
|
|
|
|
* no larger than 256 which seems reasonable.) We can then scan on each of
|
|
|
|
|
* the chunks and add them back together at the end.
|
|
|
|
|
*/
|
|
|
|
|
nir_ssa_def *x = intrin->src[0].ssa;
|
|
|
|
|
nir_ssa_def *x_low =
|
|
|
|
|
nir_u2u32(b, nir_iand_imm(b, x, 0xffffff));
|
|
|
|
|
nir_ssa_def *x_mid =
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_u2u32(b, nir_iand_imm(b, nir_ushr_imm(b, x, 24),
|
2020-10-26 12:41:08 -05:00
|
|
|
0xffffff));
|
|
|
|
|
nir_ssa_def *x_hi =
|
2023-06-16 19:43:30 +02:00
|
|
|
nir_u2u32(b, nir_ushr_imm(b, x, 48));
|
2020-10-26 12:41:08 -05:00
|
|
|
|
|
|
|
|
nir_ssa_def *scan_low =
|
|
|
|
|
build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
|
|
|
|
|
cluster_size, x_low);
|
|
|
|
|
nir_ssa_def *scan_mid =
|
|
|
|
|
build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
|
|
|
|
|
cluster_size, x_mid);
|
|
|
|
|
nir_ssa_def *scan_hi =
|
|
|
|
|
build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
|
|
|
|
|
cluster_size, x_hi);
|
|
|
|
|
|
|
|
|
|
scan_low = nir_u2u64(b, scan_low);
|
2023-06-05 12:36:39 +02:00
|
|
|
scan_mid = nir_ishl_imm(b, nir_u2u64(b, scan_mid), 24);
|
|
|
|
|
scan_hi = nir_ishl_imm(b, nir_u2u64(b, scan_hi), 48);
|
2020-10-26 12:41:08 -05:00
|
|
|
|
|
|
|
|
return nir_iadd(b, scan_hi, nir_iadd(b, scan_mid, scan_low));
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-26 10:50:35 -05:00
|
|
|
static bool
|
|
|
|
|
should_lower_int64_intrinsic(const nir_intrinsic_instr *intrin,
|
|
|
|
|
const nir_shader_compiler_options *options)
|
|
|
|
|
{
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_read_invocation:
|
|
|
|
|
case nir_intrinsic_read_first_invocation:
|
|
|
|
|
case nir_intrinsic_shuffle:
|
|
|
|
|
case nir_intrinsic_shuffle_xor:
|
|
|
|
|
case nir_intrinsic_shuffle_up:
|
|
|
|
|
case nir_intrinsic_shuffle_down:
|
|
|
|
|
case nir_intrinsic_quad_broadcast:
|
|
|
|
|
case nir_intrinsic_quad_swap_horizontal:
|
|
|
|
|
case nir_intrinsic_quad_swap_vertical:
|
|
|
|
|
case nir_intrinsic_quad_swap_diagonal:
|
|
|
|
|
return intrin->dest.ssa.bit_size == 64 &&
|
|
|
|
|
(options->lower_int64_options & nir_lower_subgroup_shuffle64);
|
|
|
|
|
|
2020-10-26 23:08:26 -05:00
|
|
|
case nir_intrinsic_vote_ieq:
|
|
|
|
|
return intrin->src[0].ssa->bit_size == 64 &&
|
|
|
|
|
(options->lower_int64_options & nir_lower_vote_ieq64);
|
|
|
|
|
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_intrinsic_reduce:
|
|
|
|
|
case nir_intrinsic_inclusive_scan:
|
|
|
|
|
case nir_intrinsic_exclusive_scan:
|
|
|
|
|
if (intrin->dest.ssa.bit_size != 64)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
switch (nir_intrinsic_reduction_op(intrin)) {
|
2020-10-26 12:41:08 -05:00
|
|
|
case nir_op_iadd:
|
|
|
|
|
return options->lower_int64_options & nir_lower_scan_reduce_iadd64;
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_op_iand:
|
|
|
|
|
case nir_op_ior:
|
|
|
|
|
case nir_op_ixor:
|
|
|
|
|
return options->lower_int64_options & nir_lower_scan_reduce_bitwise64;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_int64_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
|
|
|
|
|
{
|
|
|
|
|
switch (intrin->intrinsic) {
|
|
|
|
|
case nir_intrinsic_read_invocation:
|
|
|
|
|
case nir_intrinsic_read_first_invocation:
|
|
|
|
|
case nir_intrinsic_shuffle:
|
|
|
|
|
case nir_intrinsic_shuffle_xor:
|
|
|
|
|
case nir_intrinsic_shuffle_up:
|
|
|
|
|
case nir_intrinsic_shuffle_down:
|
|
|
|
|
case nir_intrinsic_quad_broadcast:
|
|
|
|
|
case nir_intrinsic_quad_swap_horizontal:
|
|
|
|
|
case nir_intrinsic_quad_swap_vertical:
|
|
|
|
|
case nir_intrinsic_quad_swap_diagonal:
|
|
|
|
|
return split_64bit_subgroup_op(b, intrin);
|
|
|
|
|
|
2020-10-26 23:08:26 -05:00
|
|
|
case nir_intrinsic_vote_ieq:
|
|
|
|
|
return lower_vote_ieq(b, intrin->src[0].ssa);
|
|
|
|
|
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_intrinsic_reduce:
|
|
|
|
|
case nir_intrinsic_inclusive_scan:
|
|
|
|
|
case nir_intrinsic_exclusive_scan:
|
|
|
|
|
switch (nir_intrinsic_reduction_op(intrin)) {
|
2020-10-26 12:41:08 -05:00
|
|
|
case nir_op_iadd:
|
|
|
|
|
return lower_scan_iadd64(b, intrin);
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_op_iand:
|
|
|
|
|
case nir_op_ior:
|
|
|
|
|
case nir_op_ixor:
|
|
|
|
|
return split_64bit_subgroup_op(b, intrin);
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported subgroup scan/reduce op");
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported intrinsic");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-26 10:41:41 -05:00
|
|
|
static bool
|
|
|
|
|
should_lower_int64_instr(const nir_instr *instr, const void *_options)
|
|
|
|
|
{
|
|
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_alu:
|
|
|
|
|
return should_lower_int64_alu_instr(nir_instr_as_alu(instr), _options);
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
|
return should_lower_int64_intrinsic(nir_instr_as_intrinsic(instr),
|
|
|
|
|
_options);
|
2020-10-26 10:41:41 -05:00
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_ssa_def *
|
|
|
|
|
lower_int64_instr(nir_builder *b, nir_instr *instr, void *_options)
|
|
|
|
|
{
|
|
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_alu:
|
|
|
|
|
return lower_int64_alu_instr(b, nir_instr_as_alu(instr));
|
2020-10-26 10:50:35 -05:00
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
|
return lower_int64_intrinsic(b, nir_instr_as_intrinsic(instr));
|
2020-10-26 10:41:41 -05:00
|
|
|
default:
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-23 13:56:15 -08:00
|
|
|
bool
|
2020-07-13 20:28:16 +02:00
|
|
|
nir_lower_int64(nir_shader *shader)
|
2017-02-23 13:56:15 -08:00
|
|
|
{
|
2020-10-26 10:41:41 -05:00
|
|
|
return nir_shader_lower_instructions(shader, should_lower_int64_instr,
|
|
|
|
|
lower_int64_instr,
|
2020-07-13 20:28:16 +02:00
|
|
|
(void *)shader->options);
|
2017-02-23 13:56:15 -08:00
|
|
|
}
|
2023-05-12 02:20:39 -07:00
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
should_lower_int64_float_conv(const nir_instr *instr, const void *_options)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_alu)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
|
|
|
|
|
|
|
|
switch (alu->op) {
|
|
|
|
|
case nir_op_i2f64:
|
|
|
|
|
case nir_op_i2f32:
|
|
|
|
|
case nir_op_i2f16:
|
|
|
|
|
case nir_op_u2f64:
|
|
|
|
|
case nir_op_u2f32:
|
|
|
|
|
case nir_op_u2f16:
|
|
|
|
|
case nir_op_f2i64:
|
|
|
|
|
case nir_op_f2u64:
|
|
|
|
|
return should_lower_int64_alu_instr(alu, _options);
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Like nir_lower_int64(), but only lowers conversions to/from float.
|
|
|
|
|
*
|
|
|
|
|
* These operations in particular may affect double-precision lowering,
|
|
|
|
|
* so it can be useful to run them in tandem with nir_lower_doubles().
|
|
|
|
|
*/
|
|
|
|
|
bool
|
|
|
|
|
nir_lower_int64_float_conversions(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
return nir_shader_lower_instructions(shader, should_lower_int64_float_conv,
|
|
|
|
|
lower_int64_instr,
|
|
|
|
|
(void *)shader->options);
|
|
|
|
|
}
|