mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
nir/opt_large_constants: optimize small vector constant arrays
Foz-DB Navi48: Totals from 2956 (2.58% of 114655) affected shaders: MaxWaves: 85080 -> 85110 (+0.04%) Instrs: 5167735 -> 5170572 (+0.05%); split: -0.12%, +0.17% CodeSize: 28882716 -> 28867340 (-0.05%); split: -0.14%, +0.08% VGPRs: 164484 -> 164616 (+0.08%); split: -0.09%, +0.18% SpillSGPRs: 612 -> 611 (-0.16%) Latency: 35017837 -> 34391146 (-1.79%); split: -1.80%, +0.01% InvThroughput: 6336245 -> 6323807 (-0.20%); split: -0.49%, +0.29% VClause: 112504 -> 111117 (-1.23%); split: -1.32%, +0.09% SClause: 121125 -> 117618 (-2.90%); split: -3.04%, +0.15% Copies: 392203 -> 384977 (-1.84%); split: -1.88%, +0.04% Branches: 155578 -> 155376 (-0.13%); split: -0.13%, +0.01% PreSGPRs: 127654 -> 127205 (-0.35%); split: -0.39%, +0.04% PreVGPRs: 112486 -> 112449 (-0.03%); split: -0.04%, +0.00% VALU: 2577362 -> 2586379 (+0.35%); split: -0.00%, +0.35% SALU: 889569 -> 888472 (-0.12%); split: -1.01%, +0.89% VMEM: 167203 -> 165750 (-0.87%) SMEM: 190438 -> 187313 (-1.64%) VOPD: 194411 -> 194344 (-0.03%); split: +0.01%, -0.04% Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33002>
This commit is contained in:
parent
f782524c36
commit
a9f3efcae0
2 changed files with 97 additions and 64 deletions
|
|
@ -134,7 +134,8 @@ struct var_info {
|
|||
uint32_t constant_data_size;
|
||||
void *constant_data;
|
||||
|
||||
struct small_constant small_constant;
|
||||
uint32_t num_components;
|
||||
struct small_constant small_constant[NIR_MAX_VEC_COMPONENTS];
|
||||
};
|
||||
|
||||
static int
|
||||
|
|
@ -221,34 +222,10 @@ handle_constant_store(void *mem_ctx, struct var_info *info,
|
|||
bit_size);
|
||||
}
|
||||
|
||||
static void
|
||||
get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
|
||||
static bool
|
||||
get_small_constant_component(struct small_constant *info, uint32_t array_len,
|
||||
uint32_t bit_size, nir_const_value *values)
|
||||
{
|
||||
if (!glsl_type_is_array(info->var->type))
|
||||
return;
|
||||
|
||||
const struct glsl_type *elem_type = glsl_get_array_element(info->var->type);
|
||||
if (!glsl_type_is_scalar(elem_type))
|
||||
return;
|
||||
|
||||
uint32_t array_len = glsl_get_length(info->var->type);
|
||||
uint32_t bit_size = glsl_get_bit_size(elem_type);
|
||||
|
||||
/* If our array is large, don't even bother */
|
||||
if (array_len > 64)
|
||||
return;
|
||||
|
||||
/* Skip cases that can be lowered to a bcsel ladder more efficiently. */
|
||||
if (array_len <= 3)
|
||||
return;
|
||||
|
||||
uint32_t elem_size, elem_align;
|
||||
size_align(elem_type, &elem_size, &elem_align);
|
||||
uint32_t stride = ALIGN_POT(elem_size, elem_align);
|
||||
|
||||
nir_const_value values[64];
|
||||
read_const_values(values, info->constant_data, array_len, bit_size, stride);
|
||||
|
||||
bool is_float = true;
|
||||
if (bit_size < 16) {
|
||||
is_float = false;
|
||||
|
|
@ -282,53 +259,100 @@ get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
|
|||
used_bits = util_next_power_of_two(used_bits);
|
||||
|
||||
if (used_bits * array_len > 64)
|
||||
return;
|
||||
|
||||
info->is_small = true;
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < array_len; i++) {
|
||||
uint64_t u64_elem = is_float ? nir_const_value_as_float(values[i], bit_size)
|
||||
: nir_const_value_as_uint(values[i], bit_size);
|
||||
|
||||
info->small_constant.data |= u64_elem << (i * used_bits);
|
||||
info->data |= u64_elem << (i * used_bits);
|
||||
}
|
||||
|
||||
/* Limit bit_size >= 32 to avoid unnecessary conversions. */
|
||||
info->small_constant.bit_size =
|
||||
info->bit_size =
|
||||
MAX2(util_next_power_of_two(used_bits * array_len), 32);
|
||||
info->small_constant.is_float = is_float;
|
||||
info->small_constant.bit_stride = used_bits;
|
||||
info->is_float = is_float;
|
||||
info->bit_stride = used_bits;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
get_small_constant(struct var_info *info, glsl_type_size_align_func size_align)
|
||||
{
|
||||
if (!glsl_type_is_array(info->var->type))
|
||||
return;
|
||||
|
||||
const struct glsl_type *elem_type = glsl_get_array_element(info->var->type);
|
||||
if (!glsl_type_is_scalar(elem_type) && !glsl_type_is_vector(elem_type))
|
||||
return;
|
||||
|
||||
uint32_t array_len = glsl_get_length(info->var->type);
|
||||
info->num_components = glsl_get_vector_elements(elem_type);
|
||||
uint32_t bit_size = glsl_get_bit_size(elem_type);
|
||||
|
||||
/* If our array is large, don't even bother */
|
||||
if (array_len > 64)
|
||||
return;
|
||||
|
||||
/* Skip cases that can be lowered to a bcsel ladder more efficiently. */
|
||||
if (array_len <= 3)
|
||||
return;
|
||||
|
||||
uint32_t elem_size, elem_align;
|
||||
size_align(elem_type, &elem_size, &elem_align);
|
||||
const uint32_t stride = ALIGN_POT(elem_size, elem_align);
|
||||
|
||||
const uint32_t scalar_stride = bit_size == 1 ? 4 : bit_size / 8;
|
||||
|
||||
info->is_small = true;
|
||||
for (unsigned c = 0; c < info->num_components; c++) {
|
||||
nir_const_value values[64];
|
||||
const void *data = info->constant_data;
|
||||
data = (void *)(((uintptr_t)data) + scalar_stride * c);
|
||||
read_const_values(values, data, array_len, bit_size, stride);
|
||||
|
||||
if (!get_small_constant_component(&info->small_constant[c], array_len,
|
||||
bit_size, values)) {
|
||||
info->is_small = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_small_constant_load(nir_builder *b, nir_deref_instr *deref,
|
||||
struct var_info *info, glsl_type_size_align_func size_align)
|
||||
{
|
||||
struct small_constant *constant = &info->small_constant;
|
||||
|
||||
nir_def *imm = nir_imm_intN_t(b, constant->data, constant->bit_size);
|
||||
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
nir_def *index = deref->arr.index.ssa;
|
||||
nir_def *index = nir_u2u32(b, deref->arr.index.ssa);
|
||||
|
||||
nir_def *shift = nir_imul_imm(b, index, constant->bit_stride);
|
||||
nir_def *ret[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
nir_def *ret = nir_ushr(b, imm, nir_u2u32(b, shift));
|
||||
ret = nir_iand_imm(b, ret, BITFIELD64_MASK(constant->bit_stride));
|
||||
for (unsigned c = 0; c < info->num_components; c++) {
|
||||
const struct small_constant *constant = &info->small_constant[c];
|
||||
nir_def *imm = nir_imm_intN_t(b, constant->data, constant->bit_size);
|
||||
|
||||
const unsigned bit_size = glsl_get_bit_size(deref->type);
|
||||
if (bit_size < 8) {
|
||||
/* Booleans are special-cased to be 32-bit */
|
||||
assert(glsl_type_is_boolean(deref->type));
|
||||
ret = nir_ine_imm(b, ret, 0);
|
||||
} else {
|
||||
if (constant->is_float)
|
||||
ret = nir_u2fN(b, ret, bit_size);
|
||||
else if (bit_size != constant->bit_size)
|
||||
ret = nir_u2uN(b, ret, bit_size);
|
||||
nir_def *shift = nir_imul_imm(b, index, constant->bit_stride);
|
||||
|
||||
ret[c] = nir_ushr(b, imm, shift);
|
||||
ret[c] = nir_iand_imm(b, ret[c], BITFIELD64_MASK(constant->bit_stride));
|
||||
|
||||
const unsigned bit_size = glsl_get_bit_size(deref->type);
|
||||
if (bit_size < 8) {
|
||||
/* Booleans are special-cased to be 32-bit */
|
||||
assert(glsl_type_is_boolean(deref->type));
|
||||
ret[c] = nir_ine_imm(b, ret[c], 0);
|
||||
} else {
|
||||
if (constant->is_float)
|
||||
ret[c] = nir_u2fN(b, ret[c], bit_size);
|
||||
else if (bit_size != constant->bit_size)
|
||||
ret[c] = nir_u2uN(b, ret[c], bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (info->num_components == 1)
|
||||
return ret[0];
|
||||
return nir_vec(b, ret, info->num_components);
|
||||
}
|
||||
|
||||
/** Lower large constant variables to shader constant data
|
||||
|
|
|
|||
|
|
@ -231,19 +231,28 @@ TEST_F(nir_large_constants_test, small_float_natural_numbers_including_zero_vec_
|
|||
workgroup_size: 1, 1, 1
|
||||
max_subgroup_size: 128
|
||||
min_subgroup_size: 1
|
||||
constants: 64
|
||||
decl_function main () (entrypoint)
|
||||
|
||||
impl main {
|
||||
block b0: // preds:
|
||||
32 %0 = @load_workgroup_index
|
||||
32 %1 = load_const (0x00000000)
|
||||
32 %2 = load_const (0x00000003)
|
||||
32 %3 = ishl %0, %2 (0x3)
|
||||
32 %4 = iadd %1 (0x0), %3
|
||||
32x2 %5 = @load_constant (%4) (base=0, range=64, access=none, align_mul=4, align_offset=0)
|
||||
@use (%5)
|
||||
// succs: b1
|
||||
block b0: // preds:
|
||||
32 %0 = @load_workgroup_index
|
||||
32 %1 = load_const (0x76543210 = 1985229328)
|
||||
32 %2 = load_const (0x00000002)
|
||||
32 %3 = ishl %0, %2 (0x2)
|
||||
32 %4 = ushr %1 (0x76543210), %3
|
||||
32 %5 = load_const (0x0000000f = 15)
|
||||
32 %6 = iand %4, %5 (0xf)
|
||||
32 %7 = u2f32 %6 // exact, preserve:sz
|
||||
32 %8 = load_const (0x01234567 = 19088743)
|
||||
32 %9 = load_const (0x00000002)
|
||||
32 %10 = ishl %0, %9 (0x2)
|
||||
32 %11 = ushr %8 (0x1234567), %10
|
||||
32 %12 = load_const (0x0000000f = 15)
|
||||
32 %13 = iand %11, %12 (0xf)
|
||||
32 %14 = u2f32 %13 // exact, preserve:sz
|
||||
32x2 %15 = vec2 %7, %14
|
||||
@use (%15)
|
||||
// succs: b1
|
||||
block b1:
|
||||
}
|
||||
)"));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue