mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 02:30:12 +01:00
brw: run the nir_opt_offsets pass and set the maximum offset size
Perf A/B testing on DG2: no changes
Perf A/B testing on BMG: +2.1% Blackops3, +1.5% Cyberpunk
DG2 stats (mostly insignificant):
Assassins Creed Valhalla:
Totals from 1169 (55.67% of 2100) affected shaders:
Instrs: 509237 -> 509215 (-0.00%)
Cycle count: 30614325 -> 30607419 (-0.02%); split: -0.03%, +0.00%
Non SSA regs after NIR: 83434 -> 85909 (+2.97%)
Blackops 3:
Totals from 1045 (64.63% of 1617) affected shaders:
Instrs: 527312 -> 527310 (-0.00%)
Cycle count: 496912222 -> 496902846 (-0.00%); split: -0.00%, +0.00%
Non SSA regs after NIR: 106883 -> 109095 (+2.07%)
Cyberpunk:
Totals from 706 (56.03% of 1260) affected shaders:
Instrs: 345976 -> 345974 (-0.00%); split: -0.00%, +0.00%
Cycle count: 9775138 -> 9775472 (+0.00%); split: -0.00%, +0.00%
Max live registers: 40295 -> 40297 (+0.00%)
Non SSA regs after NIR: 93245 -> 94718 (+1.58%)
Fortnite:
Totals from 4210 (55.98% of 7521) affected shaders:
Instrs: 2205471 -> 2205469 (-0.00%)
Cycle count: 91451040 -> 91450956 (-0.00%); split: -0.00%, +0.00%
Non SSA regs after NIR: 952354 -> 961664 (+0.98%)
LNL stats (notable changes):
Assassins Creed Valhalla:
Totals from 1684 (83.57% of 2015) affected shaders:
Instrs: 774305 -> 764501 (-1.27%); split: -1.27%, +0.01%
Cycle count: 58845842 -> 58699250 (-0.25%); split: -0.98%, +0.73%
Spill count: 625 -> 638 (+2.08%)
Fill count: 1490 -> 1503 (+0.87%)
Scratch Memory Size: 41984 -> 44032 (+4.88%)
Max live registers: 196424 -> 197561 (+0.58%); split: -0.10%, +0.68%
Blackops 3:
Totals from 1125 (76.53% of 1470) affected shaders:
Instrs: 781749 -> 773275 (-1.08%); split: -1.08%, +0.00%
Subgroup size: 22896 -> 22912 (+0.07%)
Cycle count: 659864454 -> 654641032 (-0.79%); split: -1.10%, +0.31%
Max live registers: 116772 -> 116854 (+0.07%); split: -0.01%, +0.08%
Non SSA regs after NIR: 172648 -> 168260 (-2.54%); split: -2.55%, +0.01%
Control:
Totals from 378 (51.50% of 734) affected shaders:
Instrs: 148184 -> 147544 (-0.43%)
Cycle count: 6905200 -> 6913366 (+0.12%); split: -0.30%, +0.42%
Max live registers: 41271 -> 41281 (+0.02%)
Non SSA regs after NIR: 44964 -> 43868 (-2.44%); split: -2.45%, +0.01%
Cyberpunk:
Totals from 1141 (92.46% of 1234) affected shaders:
Instrs: 636744 -> 629333 (-1.16%)
Subgroup size: 24256 -> 24272 (+0.07%)
Cycle count: 24952258 -> 24801298 (-0.60%); split: -1.39%, +0.78%
Max live registers: 125848 -> 126855 (+0.80%); split: -0.00%, +0.80%
Non SSA regs after NIR: 127399 -> 119837 (-5.94%); split: -5.95%, +0.02%
Fortnite:
Totals from 5497 (83.52% of 6582) affected shaders:
Instrs: 4072831 -> 4041852 (-0.76%); split: -0.77%, +0.01%
Subgroup size: 103296 -> 103312 (+0.02%)
Cycle count: 133046874 -> 132789242 (-0.19%); split: -0.67%, +0.48%
Spill count: 7218 -> 7254 (+0.50%); split: -0.33%, +0.83%
Fill count: 11724 -> 11749 (+0.21%); split: -0.34%, +0.55%
Scratch Memory Size: 591872 -> 599040 (+1.21%)
Max live registers: 816530 -> 818522 (+0.24%); split: -0.01%, +0.26%
Non SSA regs after NIR: 1610296 -> 1560284 (-3.11%); split: -3.11%, +0.00%
Hitman3:
Totals from 4713 (92.39% of 5101) affected shaders:
Instrs: 2731598 -> 2698224 (-1.22%)
Cycle count: 186422098 -> 185472640 (-0.51%); split: -1.12%, +0.61%
Spill count: 3244 -> 3242 (-0.06%)
Fill count: 9937 -> 9933 (-0.04%)
Max live registers: 585035 -> 589801 (+0.81%); split: -0.00%, +0.82%
Non SSA regs after NIR: 347681 -> 324314 (-6.72%); split: -6.73%, +0.01%
Hogwarts Legacy:
Totals from 930 (59.81% of 1555) affected shaders:
Instrs: 464146 -> 459526 (-1.00%); split: -1.00%, +0.01%
Subgroup size: 19104 -> 19120 (+0.08%)
Cycle count: 24062460 -> 24078964 (+0.07%); split: -0.49%, +0.56%
Spill count: 2068 -> 1964 (-5.03%); split: -5.22%, +0.19%
Fill count: 2342 -> 2205 (-5.85%); split: -6.40%, +0.56%
Scratch Memory Size: 147456 -> 141312 (-4.17%)
Max live registers: 112384 -> 112787 (+0.36%); split: -0.08%, +0.44%
Non SSA regs after NIR: 80293 -> 79161 (-1.41%); split: -1.72%, +0.32%
Metro Exodus:
Totals from 29755 (78.62% of 37846) affected shaders:
Instrs: 11495578 -> 11492951 (-0.02%); split: -0.02%, +0.00%
Subgroup size: 644688 -> 644704 (+0.00%)
Cycle count: 301572068 -> 301548054 (-0.01%); split: -0.03%, +0.02%
Max live registers: 3369504 -> 3370454 (+0.03%); split: -0.00%, +0.03%
Non SSA regs after NIR: 2476561 -> 2396090 (-3.25%); split: -3.27%, +0.02%
Red Dead Redemption 2:
Totals from 4161 (78.61% of 5293) affected shaders:
Instrs: 2428782 -> 2409032 (-0.81%); split: -0.82%, +0.00%
Subgroup size: 85344 -> 85360 (+0.02%)
Cycle count: 8514984142 -> 8533415324 (+0.22%); split: -0.02%, +0.23%
Spill count: 4659 -> 4674 (+0.32%); split: -0.02%, +0.34%
Fill count: 11236 -> 11231 (-0.04%); split: -0.19%, +0.14%
Scratch Memory Size: 398336 -> 397312 (-0.26%)
Max live registers: 473946 -> 475798 (+0.39%); split: -0.08%, +0.47%
Non SSA regs after NIR: 616820 -> 567706 (-7.96%); split: -8.09%, +0.12%
Rise Of The Tomb Raider:
Totals from 68 (46.58% of 146) affected shaders:
Instrs: 28209 -> 27801 (-1.45%)
Subgroup size: 1584 -> 1600 (+1.01%)
Cycle count: 16182992 -> 16249364 (+0.41%); split: -0.97%, +1.38%
Max live registers: 7320 -> 7296 (-0.33%); split: -0.38%, +0.05%
Non SSA regs after NIR: 8438 -> 8207 (-2.74%); split: -2.82%, +0.08%
Spiderman Remastered:
Totals from 6403 (93.87% of 6821) affected shaders:
Instrs: 5662713 -> 5597949 (-1.14%); split: -1.28%, +0.14%
Cycle count: 282861519016 -> 279806958122 (-1.08%); split: -1.26%, +0.18%
Spill count: 61150 -> 60754 (-0.65%); split: -1.13%, +0.48%
Fill count: 162597 -> 163190 (+0.36%); split: -0.84%, +1.21%
Scratch Memory Size: 5834752 -> 5804032 (-0.53%); split: -0.70%, +0.18%
Max live registers: 901926 -> 903820 (+0.21%); split: -0.01%, +0.22%
Non SSA regs after NIR: 555053 -> 521016 (-6.13%); split: -6.14%, +0.01%
Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35252>
This commit is contained in:
parent
8a5e062e5e
commit
e103afe7be
4 changed files with 150 additions and 1 deletions
|
|
@ -1756,6 +1756,58 @@ get_mem_access_size_align(nir_intrinsic_op intrin, uint8_t bytes,
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_ssbo_intel_instr(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ssbo: {
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *value = nir_load_ssbo_intel(
|
||||
b,
|
||||
intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
intrin->src[0].ssa,
|
||||
intrin->src[1].ssa,
|
||||
.access = nir_intrinsic_access(intrin),
|
||||
.align_mul = nir_intrinsic_align_mul(intrin),
|
||||
.align_offset = nir_intrinsic_align_offset(intrin),
|
||||
.base = 0);
|
||||
value->loop_invariant = intrin->def.loop_invariant;
|
||||
value->divergent = intrin->def.divergent;
|
||||
nir_def_replace(&intrin->def, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_ssbo: {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_store_ssbo_intel(
|
||||
b,
|
||||
intrin->src[0].ssa,
|
||||
intrin->src[1].ssa,
|
||||
intrin->src[2].ssa,
|
||||
.access = nir_intrinsic_access(intrin),
|
||||
.align_mul = nir_intrinsic_align_mul(intrin),
|
||||
.align_offset = nir_intrinsic_align_offset(intrin),
|
||||
.base = 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_ssbo_intel(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader,
|
||||
brw_nir_ssbo_intel_instr,
|
||||
nir_metadata_control_flow,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_vectorize_lower_mem_access(nir_shader *nir,
|
||||
const struct brw_compiler *compiler,
|
||||
|
|
@ -1808,7 +1860,6 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
struct brw_mem_access_cb_data cb_data = {
|
||||
.devinfo = compiler->devinfo,
|
||||
};
|
||||
|
|
@ -1836,6 +1887,23 @@ brw_vectorize_lower_mem_access(nir_shader *nir,
|
|||
OPT(nir_opt_algebraic);
|
||||
OPT(nir_opt_constant_folding);
|
||||
}
|
||||
|
||||
/* Do this after the vectorization & brw_nir_rebase_const_offset_ubo_loads
|
||||
* so that we maximize the offset put into the messages.
|
||||
*/
|
||||
if (compiler->devinfo->ver >= 20) {
|
||||
OPT(brw_nir_ssbo_intel);
|
||||
|
||||
const nir_opt_offsets_options offset_options = {
|
||||
.buffer_max = UINT32_MAX,
|
||||
.shared_max = UINT32_MAX,
|
||||
.shared_atomic_max = UINT32_MAX,
|
||||
.uniform_max = UINT32_MAX,
|
||||
};
|
||||
OPT(nir_opt_offsets, &offset_options);
|
||||
|
||||
OPT(brw_nir_lower_immediate_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -215,6 +215,8 @@ bool brw_nir_lower_texture(nir_shader *nir,
|
|||
|
||||
bool brw_nir_lower_sample_index_in_coord(nir_shader *nir);
|
||||
|
||||
bool brw_nir_lower_immediate_offsets(nir_shader *shader);
|
||||
|
||||
bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader,
|
||||
const struct
|
||||
intel_device_info *devinfo);
|
||||
|
|
|
|||
78
src/intel/compiler/brw_nir_lower_immediate_offsets.c
Normal file
78
src/intel/compiler/brw_nir_lower_immediate_offsets.c
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2025 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "brw_eu.h"
|
||||
#include "brw_nir.h"
|
||||
|
||||
static bool
|
||||
lower_immediate_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
unsigned max_bits = 0;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_shared_atomic:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
case nir_intrinsic_load_shared_block_intel:
|
||||
case nir_intrinsic_store_shared_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||
max_bits = LSC_ADDRESS_OFFSET_FLAT_BITS;
|
||||
break;
|
||||
case nir_intrinsic_load_ssbo_intel:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_store_ssbo_intel:
|
||||
case nir_intrinsic_store_ssbo_block_intel: {
|
||||
nir_src *binding = nir_get_io_index_src(intrin);
|
||||
const bool has_resource =
|
||||
binding->ssa->parent_instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(binding->ssa->parent_instr)->intrinsic ==
|
||||
nir_intrinsic_resource_intel;
|
||||
bool ss_binding = false;
|
||||
if (has_resource) {
|
||||
nir_intrinsic_instr *resource =
|
||||
nir_instr_as_intrinsic(binding->ssa->parent_instr);
|
||||
ss_binding = (nir_intrinsic_resource_access_intel(resource) &
|
||||
nir_resource_intel_bindless) != 0;
|
||||
}
|
||||
max_bits = ss_binding ?
|
||||
LSC_ADDRESS_OFFSET_SS_BITS : LSC_ADDRESS_OFFSET_BTI_BITS;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(nir_intrinsic_has_base(intrin));
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
const int32_t min = u_intN_min(max_bits);
|
||||
const int32_t max = u_intN_max(max_bits);
|
||||
|
||||
const int32_t base = nir_intrinsic_base(intrin);
|
||||
if ((base % 4) == 0 && base >= min && base <= max)
|
||||
return false;
|
||||
|
||||
int32_t new_base = CLAMP(base, min, max);
|
||||
new_base -= new_base % 4;
|
||||
|
||||
assert(new_base >= min && new_base <= max);
|
||||
|
||||
nir_src *offset_src = nir_get_io_offset_src(intrin);
|
||||
nir_src_rewrite(offset_src, nir_iadd_imm(b, offset_src->ssa, base - new_base));
|
||||
nir_intrinsic_set_base(intrin, new_base);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_immediate_offsets(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, lower_immediate_offsets,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
|
@ -73,6 +73,7 @@ libintel_compiler_brw_files = files(
|
|||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
'brw_nir_lower_fs_barycentrics.c',
|
||||
'brw_nir_lower_fs_msaa.c',
|
||||
'brw_nir_lower_immediate_offsets.c',
|
||||
'brw_nir_lower_intersection_shader.c',
|
||||
'brw_nir_lower_ray_queries.c',
|
||||
'brw_nir_lower_rt_intrinsics.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue