mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
nir: unify lower_bitfield_insert with has_{bfm,bfi,bitfield_select}
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24662>
This commit is contained in:
parent
bfb55d0266
commit
34c3f81614
16 changed files with 35 additions and 29 deletions
|
|
@ -75,7 +75,7 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
|
|||
.lower_fdiv = true,
|
||||
.lower_fmod = true,
|
||||
.lower_ineg = true,
|
||||
.lower_bitfield_insert_to_bitfield_select = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
|
|
@ -98,6 +98,8 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
|
|||
.lower_hadd = true,
|
||||
.lower_mul_32x16 = true,
|
||||
.lower_uclz = true,
|
||||
.has_bfm = true,
|
||||
.has_bitfield_select = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ static const nir_shader_compiler_options agx_nir_options = {
|
|||
.lower_flrp32 = true,
|
||||
.lower_fpow = true,
|
||||
.lower_fmod = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_uadd_carry = true,
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
|
|||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
|
|
|
|||
|
|
@ -3415,12 +3415,8 @@ typedef struct nir_shader_compiler_options {
|
|||
bool lower_bitfield_extract;
|
||||
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
|
||||
bool lower_bitfield_extract_to_shifts;
|
||||
/** Lowers bitfield_insert to bfi/bfm */
|
||||
/** Lowers bitfield_insert. */
|
||||
bool lower_bitfield_insert;
|
||||
/** Lowers bitfield_insert to compares, and shifts. */
|
||||
bool lower_bitfield_insert_to_shifts;
|
||||
/** Lowers bitfield_insert to bfm/bitfield_select. */
|
||||
bool lower_bitfield_insert_to_bitfield_select;
|
||||
/** Lowers bitfield_reverse to shifts. */
|
||||
bool lower_bitfield_reverse;
|
||||
/** Lowers bit_count to shifts. */
|
||||
|
|
@ -3748,6 +3744,15 @@ typedef struct nir_shader_compiler_options {
|
|||
/** Backend supports bitz/bitnz. */
|
||||
bool has_bit_test;
|
||||
|
||||
/** Backend supports bfm. */
|
||||
bool has_bfm;
|
||||
|
||||
/** Backend supports bfi. */
|
||||
bool has_bfi;
|
||||
|
||||
/** Backend supports bitfield_select. */
|
||||
bool has_bitfield_select;
|
||||
|
||||
/**
|
||||
* Is this the Intel vec4 backend?
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1184,9 +1184,6 @@ ${pass_name}(nir_shader *shader)
|
|||
*/
|
||||
assert((int)options->lower_bitfield_extract +
|
||||
(int)options->lower_bitfield_extract_to_shifts <= 1);
|
||||
assert((int)options->lower_bitfield_insert +
|
||||
(int)options->lower_bitfield_insert_to_shifts +
|
||||
(int)options->lower_bitfield_insert_to_bitfield_select <= 1);
|
||||
|
||||
|
||||
STATIC_ASSERT(${str(cache["next_index"])} == ARRAY_SIZE(${pass_name}_values));
|
||||
|
|
|
|||
|
|
@ -1895,7 +1895,7 @@ optimizations.extend([
|
|||
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
|
||||
('bcsel', ('ult', 31, 'bits'), 'insert',
|
||||
('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
|
||||
'options->lower_bitfield_insert'),
|
||||
'options->lower_bitfield_insert && options->has_bfm && options->has_bfi'),
|
||||
(('ihadd', a, b), ('iadd', ('iand', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
|
||||
(('uhadd', a, b), ('iadd', ('iand', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
|
||||
(('irhadd', a, b), ('isub', ('ior', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
|
||||
|
|
@ -1988,13 +1988,13 @@ optimizations.extend([
|
|||
(('ior',
|
||||
('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
|
||||
('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))))),
|
||||
'options->lower_bitfield_insert_to_shifts'),
|
||||
'options->lower_bitfield_insert && (!options->has_bfm || (!options->has_bfi && !options->has_bitfield_select))'),
|
||||
|
||||
# Alternative lowering that uses bitfield_select.
|
||||
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
|
||||
('bcsel', ('ult', 31, 'bits'), 'insert',
|
||||
('bitfield_select', ('bfm', 'bits', 'offset'), ('ishl', 'insert', 'offset'), 'base')),
|
||||
'options->lower_bitfield_insert_to_bitfield_select'),
|
||||
'options->lower_bitfield_insert && options->has_bfm && options->has_bitfield_select'),
|
||||
|
||||
(('ibitfield_extract', 'value', 'offset', 'bits'),
|
||||
('bcsel', ('ult', 31, 'bits'), 'value',
|
||||
|
|
@ -2029,7 +2029,7 @@ optimizations.extend([
|
|||
('bfm', 'width', 'offset')),
|
||||
|
||||
# open-coded BFM
|
||||
(('iadd@32', ('ishl', 1, a), -1), ('bfm', a, 0), 'options->lower_bitfield_insert_to_bitfield_select || options->lower_bitfield_insert'),
|
||||
(('iadd@32', ('ishl', 1, a), -1), ('bfm', a, 0), 'options->has_bfm'),
|
||||
(('ishl', ('bfm', a, 0), b), ('bfm', a, b)),
|
||||
|
||||
# Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ static const nir_shader_compiler_options ir3_base_options = {
|
|||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
|||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fsat = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_fdph = true,
|
||||
.lower_ffma16 = true,
|
||||
|
|
|
|||
|
|
@ -588,7 +588,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
|||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fsat = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_fdot = true,
|
||||
.lower_fdph = true,
|
||||
|
|
|
|||
|
|
@ -1339,6 +1339,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
.lower_fmod = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
|
|
@ -1381,14 +1382,14 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
|
||||
if (rscreen->info.gfx_level >= EVERGREEN) {
|
||||
rscreen->nir_options.lower_bitfield_extract = true;
|
||||
rscreen->nir_options.lower_bitfield_insert_to_bitfield_select = true;
|
||||
rscreen->nir_options.has_bfm = true;
|
||||
rscreen->nir_options.has_bitfield_select = true;
|
||||
}
|
||||
|
||||
if (rscreen->info.gfx_level < EVERGREEN) {
|
||||
/* Pre-EG doesn't have these ALU ops */
|
||||
rscreen->nir_options.lower_bit_count = true;
|
||||
rscreen->nir_options.lower_bitfield_reverse = true;
|
||||
rscreen->nir_options.lower_bitfield_insert_to_shifts = true;
|
||||
rscreen->nir_options.lower_bitfield_extract_to_shifts = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1268,7 +1268,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_bitfield_insert_to_bitfield_select = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract = true,
|
||||
/* |---------------------------------- Performance & Availability --------------------------------|
|
||||
* |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
|
||||
|
|
@ -1323,6 +1323,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||
.has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
|
||||
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
|
||||
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11,
|
||||
.has_bfm = true,
|
||||
.has_bitfield_select = true,
|
||||
.optimize_sample_mask_in = true,
|
||||
.max_unroll_iterations = 128,
|
||||
.max_unroll_iterations_aggressive = 128,
|
||||
|
|
|
|||
|
|
@ -699,7 +699,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
|
|||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@
|
|||
.lower_fisnormal = true, \
|
||||
.lower_isign = true, \
|
||||
.lower_ldexp = true, \
|
||||
.lower_bitfield_insert = true, \
|
||||
.lower_device_index_to_zero = true, \
|
||||
.vectorize_io = true, \
|
||||
.vectorize_tess_levels = true, \
|
||||
|
|
@ -184,8 +185,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
|
||||
nir_options->lower_bitfield_extract = devinfo->ver >= 7;
|
||||
nir_options->lower_bitfield_extract_to_shifts = devinfo->ver < 7;
|
||||
nir_options->lower_bitfield_insert = devinfo->ver >= 7;
|
||||
nir_options->lower_bitfield_insert_to_shifts = devinfo->ver < 7;
|
||||
nir_options->has_bfm = devinfo->ver >= 7;
|
||||
nir_options->has_bfi = devinfo->ver >= 7;
|
||||
|
||||
nir_options->lower_rotate = devinfo->ver < 11;
|
||||
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
|
||||
|
|
|
|||
|
|
@ -3387,9 +3387,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
|
|||
op.lower_fmod = true;
|
||||
op.lower_bitfield_extract = false;
|
||||
op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_insert = false;
|
||||
op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_insert_to_bitfield_select = false;
|
||||
op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
|
||||
op.lower_ifind_msb = (chipset < NVISA_GF100_CHIPSET);
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ void bifrost_compile_shader_nir(nir_shader *nir,
|
|||
\
|
||||
.lower_fsign = true, \
|
||||
\
|
||||
.lower_bitfield_insert_to_shifts = true, \
|
||||
.lower_bitfield_insert = true, \
|
||||
.lower_bitfield_extract_to_shifts = true, \
|
||||
.lower_insert_byte = true, \
|
||||
.lower_rotate = true, \
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
|
|||
|
||||
.lower_bit_count = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue