nir: unify lower_bitfield_insert with has_{bfm,bfi,bitfield_select}

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24662>
This commit is contained in:
Georg Lehmann 2023-08-14 19:11:51 +02:00 committed by Marge Bot
parent bfb55d0266
commit 34c3f81614
16 changed files with 35 additions and 29 deletions

View file

@ -75,7 +75,7 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
.lower_fdiv = true,
.lower_fmod = true,
.lower_ineg = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_4x8 = true,
@ -98,6 +98,8 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
.lower_hadd = true,
.lower_mul_32x16 = true,
.lower_uclz = true,
.has_bfm = true,
.has_bitfield_select = true,
.has_fsub = true,
.has_isub = true,
.has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,

View file

@ -227,7 +227,7 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fmod = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_ifind_msb = true,
.lower_find_lsb = true,
.lower_uadd_carry = true,

View file

@ -193,7 +193,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,

View file

@ -3415,12 +3415,8 @@ typedef struct nir_shader_compiler_options {
bool lower_bitfield_extract;
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
bool lower_bitfield_extract_to_shifts;
/** Lowers bitfield_insert to bfi/bfm */
/** Lowers bitfield_insert. */
bool lower_bitfield_insert;
/** Lowers bitfield_insert to compares, and shifts. */
bool lower_bitfield_insert_to_shifts;
/** Lowers bitfield_insert to bfm/bitfield_select. */
bool lower_bitfield_insert_to_bitfield_select;
/** Lowers bitfield_reverse to shifts. */
bool lower_bitfield_reverse;
/** Lowers bit_count to shifts. */
@ -3748,6 +3744,15 @@ typedef struct nir_shader_compiler_options {
/** Backend supports bitz/bitnz. */
bool has_bit_test;
/** Backend supports bfm. */
bool has_bfm;
/** Backend supports bfi. */
bool has_bfi;
/** Backend supports bitfield_select. */
bool has_bitfield_select;
/**
* Is this the Intel vec4 backend?
*

View file

@ -1184,9 +1184,6 @@ ${pass_name}(nir_shader *shader)
*/
assert((int)options->lower_bitfield_extract +
(int)options->lower_bitfield_extract_to_shifts <= 1);
assert((int)options->lower_bitfield_insert +
(int)options->lower_bitfield_insert_to_shifts +
(int)options->lower_bitfield_insert_to_bitfield_select <= 1);
STATIC_ASSERT(${str(cache["next_index"])} == ARRAY_SIZE(${pass_name}_values));

View file

@ -1895,7 +1895,7 @@ optimizations.extend([
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'insert',
('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
'options->lower_bitfield_insert'),
'options->lower_bitfield_insert && options->has_bfm && options->has_bfi'),
(('ihadd', a, b), ('iadd', ('iand', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
(('uhadd', a, b), ('iadd', ('iand', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
(('irhadd', a, b), ('isub', ('ior', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
@ -1988,13 +1988,13 @@ optimizations.extend([
(('ior',
('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))))),
'options->lower_bitfield_insert_to_shifts'),
'options->lower_bitfield_insert && (!options->has_bfm || (!options->has_bfi && !options->has_bitfield_select))'),
# Alternative lowering that uses bitfield_select.
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'insert',
('bitfield_select', ('bfm', 'bits', 'offset'), ('ishl', 'insert', 'offset'), 'base')),
'options->lower_bitfield_insert_to_bitfield_select'),
'options->lower_bitfield_insert && options->has_bfm && options->has_bitfield_select'),
(('ibitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'value',
@ -2029,7 +2029,7 @@ optimizations.extend([
('bfm', 'width', 'offset')),
# open-coded BFM
(('iadd@32', ('ishl', 1, a), -1), ('bfm', a, 0), 'options->lower_bitfield_insert_to_bitfield_select || options->lower_bitfield_insert'),
(('iadd@32', ('ishl', 1, a), -1), ('bfm', a, 0), 'options->has_bfm'),
(('ishl', ('bfm', a, 0), b), ('bfm', a, b)),
# Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:

View file

@ -95,7 +95,7 @@ static const nir_shader_compiler_options ir3_base_options = {
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,

View file

@ -134,7 +134,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsat = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_fdph = true,
.lower_ffma16 = true,

View file

@ -588,7 +588,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsat = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_fdot = true,
.lower_fdph = true,

View file

@ -1339,6 +1339,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.lower_fmod = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_bitfield_insert = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
@ -1381,14 +1382,14 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
if (rscreen->info.gfx_level >= EVERGREEN) {
rscreen->nir_options.lower_bitfield_extract = true;
rscreen->nir_options.lower_bitfield_insert_to_bitfield_select = true;
rscreen->nir_options.has_bfm = true;
rscreen->nir_options.has_bitfield_select = true;
}
if (rscreen->info.gfx_level < EVERGREEN) {
/* Pre-EG doesn't have these ALU ops */
rscreen->nir_options.lower_bit_count = true;
rscreen->nir_options.lower_bitfield_reverse = true;
rscreen->nir_options.lower_bitfield_insert_to_shifts = true;
rscreen->nir_options.lower_bitfield_extract_to_shifts = true;
}

View file

@ -1268,7 +1268,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
/* |---------------------------------- Performance & Availability --------------------------------|
* |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
@ -1323,6 +1323,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11,
.has_bfm = true,
.has_bitfield_select = true,
.optimize_sample_mask_in = true,
.max_unroll_iterations = 128,
.max_unroll_iterations_aggressive = 128,

View file

@ -699,7 +699,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,

View file

@ -40,6 +40,7 @@
.lower_fisnormal = true, \
.lower_isign = true, \
.lower_ldexp = true, \
.lower_bitfield_insert = true, \
.lower_device_index_to_zero = true, \
.vectorize_io = true, \
.vectorize_tess_levels = true, \
@ -184,8 +185,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_options->lower_bitfield_extract = devinfo->ver >= 7;
nir_options->lower_bitfield_extract_to_shifts = devinfo->ver < 7;
nir_options->lower_bitfield_insert = devinfo->ver >= 7;
nir_options->lower_bitfield_insert_to_shifts = devinfo->ver < 7;
nir_options->has_bfm = devinfo->ver >= 7;
nir_options->has_bfi = devinfo->ver >= 7;
nir_options->lower_rotate = devinfo->ver < 11;
nir_options->lower_bitfield_reverse = devinfo->ver < 7;

View file

@ -3387,9 +3387,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
op.lower_fmod = true;
op.lower_bitfield_extract = false;
op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
op.lower_bitfield_insert = false;
op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
op.lower_bitfield_insert_to_bitfield_select = false;
op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);

View file

@ -52,7 +52,7 @@ void bifrost_compile_shader_nir(nir_shader *nir,
\
.lower_fsign = true, \
\
.lower_bitfield_insert_to_shifts = true, \
.lower_bitfield_insert = true, \
.lower_bitfield_extract_to_shifts = true, \
.lower_insert_byte = true, \
.lower_rotate = true, \

View file

@ -66,7 +66,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
.lower_bit_count = true,
.lower_bitfield_reverse = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract_to_shifts = true,
.lower_extract_byte = true,
.lower_extract_word = true,