mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
nir/opt_load_store_vectorize: only attempt to vectorize shared2 after exhausting other possibilities
Totals from 249 (0.31% of 79839) affected shaders: (Navi48) Instrs: 276401 -> 275918 (-0.17%); split: -0.29%, +0.11% CodeSize: 1477072 -> 1474440 (-0.18%); split: -0.26%, +0.08% VGPRs: 12748 -> 12760 (+0.09%); split: -0.28%, +0.38% Latency: 1397959 -> 1398846 (+0.06%); split: -0.10%, +0.16% InvThroughput: 424767 -> 424496 (-0.06%); split: -0.09%, +0.02% VClause: 5183 -> 5186 (+0.06%); split: -0.10%, +0.15% SClause: 6537 -> 6538 (+0.02%); split: -0.05%, +0.06% Copies: 21295 -> 21098 (-0.93%); split: -1.21%, +0.29% Branches: 4324 -> 4325 (+0.02%) PreSGPRs: 9719 -> 9717 (-0.02%) PreVGPRs: 8857 -> 8847 (-0.11%); split: -0.24%, +0.12% VALU: 144514 -> 144334 (-0.12%); split: -0.20%, +0.07% SALU: 38970 -> 38944 (-0.07%); split: -0.08%, +0.01% VOPD: 884 -> 898 (+1.58%); split: +1.92%, -0.34% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36133>
This commit is contained in:
parent
148063670d
commit
957b271a9f
1 changed files with 32 additions and 22 deletions
|
|
@ -1446,32 +1446,42 @@ vectorize_sorted_entries(struct vectorize_ctx *ctx, nir_function_impl *impl,
|
|||
* callback if needed. Driver callbacks will likely want to
|
||||
* restrict this to a smaller value, say 4 bytes (or none).
|
||||
*/
|
||||
unsigned max_hole =
|
||||
first->is_store ||
|
||||
(ctx->options->has_shared2_amd &&
|
||||
get_variable_mode(first) == nir_var_mem_shared)
|
||||
? 0
|
||||
: 28;
|
||||
unsigned max_hole = first->is_store ? 0 : 28;
|
||||
unsigned low_size = get_bit_size(low) / 8u * low->num_components;
|
||||
bool separate = diff > max_hole + low_size;
|
||||
if (separate)
|
||||
continue;
|
||||
|
||||
if (separate) {
|
||||
if (!ctx->options->has_shared2_amd ||
|
||||
get_variable_mode(first) != nir_var_mem_shared)
|
||||
break;
|
||||
if (try_vectorize(impl, ctx, low, high, first, second)) {
|
||||
low = low->is_store ? second : first;
|
||||
*util_dynarray_element(arr, struct entry *, second_idx) = NULL;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
*util_dynarray_element(arr, struct entry *, first_idx) = low;
|
||||
}
|
||||
|
||||
if (try_vectorize_shared2(ctx, low, high, first, second)) {
|
||||
low = NULL;
|
||||
*util_dynarray_element(arr, struct entry *, second_idx) = NULL;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (try_vectorize(impl, ctx, low, high, first, second)) {
|
||||
low = low->is_store ? second : first;
|
||||
*util_dynarray_element(arr, struct entry *, second_idx) = NULL;
|
||||
progress = true;
|
||||
}
|
||||
if (!ctx->options->has_shared2_amd)
|
||||
return progress;
|
||||
|
||||
/* Do a second pass for backends which support load/store shared2. */
|
||||
for (unsigned first_idx = 0; first_idx < num_entries; first_idx++) {
|
||||
struct entry *low = *util_dynarray_element(arr, struct entry *, first_idx);
|
||||
if (!low || get_variable_mode(low) != nir_var_mem_shared)
|
||||
continue;
|
||||
|
||||
for (unsigned second_idx = first_idx + 1; second_idx < num_entries; second_idx++) {
|
||||
struct entry *high = *util_dynarray_element(arr, struct entry *, second_idx);
|
||||
if (!high || get_variable_mode(high) != nir_var_mem_shared)
|
||||
continue;
|
||||
|
||||
struct entry *first = low->index < high->index ? low : high;
|
||||
struct entry *second = low->index < high->index ? high : low;
|
||||
if (try_vectorize_shared2(ctx, low, high, first, second)) {
|
||||
low = NULL;
|
||||
*util_dynarray_element(arr, struct entry *, second_idx) = NULL;
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue