mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-20 07:40:42 +01:00
nak: support has_load_global_bounded on turing and newer
Totals: CodeSize: 9401446416 -> 8663482432 (-7.85%); split: -7.85%, +0.00% Number of GPRs: 47297665 -> 47508294 (+0.45%); split: -0.14%, +0.59% SLM Size: 1202912 -> 1203000 (+0.01%); split: -0.09%, +0.10% Static cycle count: 5984801035 -> 4714013561 (-21.23%); split: -21.24%, +0.00% Spills to memory: 44482 -> 45073 (+1.33%); split: -1.68%, +3.01% Fills from memory: 44482 -> 45073 (+1.33%); split: -1.68%, +3.01% Spills to reg: 184822 -> 149129 (-19.31%); split: -21.54%, +2.23% Fills from reg: 223885 -> 170692 (-23.76%); split: -25.49%, +1.73% Max warps/SM: 50642520 -> 50564740 (-0.15%); split: +0.03%, -0.19% Totals from 185510 (15.95% of 1163204) affected shaders: CodeSize: 3910084048 -> 3172120064 (-18.87%); split: -18.88%, +0.01% Number of GPRs: 10625243 -> 10835872 (+1.98%); split: -0.63%, +2.61% SLM Size: 659568 -> 659656 (+0.01%); split: -0.17%, +0.19% Static cycle count: 3920553863 -> 2649766389 (-32.41%); split: -32.42%, +0.01% Spills to memory: 8498 -> 9089 (+6.95%); split: -8.81%, +15.77% Fills from memory: 8498 -> 9089 (+6.95%); split: -8.81%, +15.77% Spills to reg: 109049 -> 73356 (-32.73%); split: -36.51%, +3.77% Fills from reg: 116031 -> 62838 (-45.84%); split: -49.18%, +3.34% Max warps/SM: 6885584 -> 6807804 (-1.13%); split: +0.25%, -1.38% This also helps significantly reduce shader compile times since it reduces the number of basic blocks. With DragonAge: The Veilguard, it reduces shader compile times by around 20%. Reviewed-by: Mary Guillemard <mary@mary.zone> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Acked-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40272>
This commit is contained in:
parent
7722bde53b
commit
f7ad45e5fc
2 changed files with 19 additions and 1 deletions
|
|
@ -181,6 +181,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
|
|||
has_pack_half_2x16_rtz: true,
|
||||
has_bfm: dev.sm >= 70,
|
||||
discard_is_demote: true,
|
||||
has_load_global_bounded: dev.sm >= 73,
|
||||
|
||||
max_unroll_iterations: 32,
|
||||
max_samples: 8,
|
||||
|
|
|
|||
|
|
@ -1019,6 +1019,22 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, addr->ssa, nir_true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_global_bounded:
|
||||
case nir_intrinsic_load_global_constant_bounded: {
|
||||
assert(nak->sm >= 73);
|
||||
|
||||
nir_src *base = &intr->src[0];
|
||||
nir_src *offset = &intr->src[1];
|
||||
nir_src *size = &intr->src[2];
|
||||
unsigned load_size = intr->def.num_components * intr->def.bit_size / 8;
|
||||
|
||||
/* see addr_is_in_bounds in nir_lower_explicit_io.c */
|
||||
nir_def *addr = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa));
|
||||
nir_def *last_byte = nir_iadd_imm(&b, offset->ssa, load_size - 1);
|
||||
nir_def *cond = nir_ult(&b, last_byte, size->ssa);
|
||||
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, addr, cond);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_scratch:
|
||||
res = nir_load_scratch_nv(&b, intr->def.num_components, intr->def.bit_size, addr->ssa);
|
||||
break;
|
||||
|
|
@ -1055,7 +1071,8 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
|
||||
if (nir_intrinsic_has_access(intr))
|
||||
nir_intrinsic_set_access(new, nir_intrinsic_access(intr));
|
||||
if (intr->intrinsic == nir_intrinsic_load_global_constant)
|
||||
if (intr->intrinsic == nir_intrinsic_load_global_constant ||
|
||||
intr->intrinsic == nir_intrinsic_load_global_constant_bounded)
|
||||
nir_intrinsic_set_access(new, nir_intrinsic_access(new) | ACCESS_CAN_REORDER);
|
||||
|
||||
if (nir_intrinsic_has_align_mul(intr))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue