nak: move all counstant_bounded and constant_offset lowering to nak_nir_lower_load_store

We want to run nir_opt_sink before the lowering and for that it's best if
we add the bound checking math after sinking the loads.

Totals:
CodeSize: 9003190576 -> 9003190080 (-0.00%); split: -0.00%, +0.00%
Static cycle count: 5001955177 -> 5001932761 (-0.00%); split: -0.00%, +0.00%

Totals from 350 (0.03% of 1212873) affected shaders:
CodeSize: 9562192 -> 9561696 (-0.01%); split: -0.02%, +0.02%
Static cycle count: 6031366 -> 6008950 (-0.37%); split: -0.40%, +0.03%
This commit is contained in:
Karol Herbst 2026-04-12 16:37:23 +02:00 committed by Karol Herbst
parent 356c279daa
commit 93758c43c3
2 changed files with 26 additions and 52 deletions

View file

@ -1029,8 +1029,6 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
}
case nir_intrinsic_load_global_bounded:
case nir_intrinsic_load_global_constant_bounded: {
assert(nak->sm >= 73);
nir_src *base = &intr->src[0];
nir_src *offset = &intr->src[1];
nir_src *size = &intr->src[2];
@ -1040,7 +1038,31 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
nir_def *addr = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa));
nir_def *last_byte = nir_iadd_imm(&b, offset->ssa, load_size - 1);
nir_def *cond = nir_ult(&b, last_byte, size->ssa);
nir_def *zero = NULL;
if (nak->sm < 73) {
zero = nir_imm_zero(&b, intr->def.num_components, intr->def.bit_size);
nir_push_if(&b, cond);
}
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, addr, cond);
new = nir_def_as_intrinsic(res);
if (nak->sm < 73) {
nir_pop_if(&b, NULL);
res = nir_if_phi(&b, res, zero);
}
break;
}
case nir_intrinsic_load_global_constant_offset: {
nir_src *base = &intr->src[0];
nir_src *offset = &intr->src[1];
nir_def *address = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa));
nir_def *nir_true = nir_imm_bool(&b, true);
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, address, nir_true);
break;
}
case nir_intrinsic_load_scratch:
@ -1080,7 +1102,8 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
if (nir_intrinsic_has_access(intr))
nir_intrinsic_set_access(new, nir_intrinsic_access(intr));
if (intr->intrinsic == nir_intrinsic_load_global_constant ||
intr->intrinsic == nir_intrinsic_load_global_constant_bounded)
intr->intrinsic == nir_intrinsic_load_global_constant_bounded ||
intr->intrinsic == nir_intrinsic_load_global_constant_offset)
nir_intrinsic_set_access(new, nir_intrinsic_access(new) | ACCESS_CAN_REORDER);
if (nir_intrinsic_has_align_mul(intr))

View file

@ -244,8 +244,6 @@ static bool
lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
UNUSED void *data)
{
struct nvk_physical_device *pdev = data;
switch (load->intrinsic) {
case nir_intrinsic_load_ubo: {
b->cursor = nir_before_instr(&load->instr);
@ -275,53 +273,6 @@ lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
return true;
}
case nir_intrinsic_load_global_constant_bounded:
/* Handled inside nak_nir_lower_load_store */
if (pdev->info.sm >= 73)
return false;
FALLTHROUGH;
case nir_intrinsic_load_global_constant_offset: {
b->cursor = nir_before_instr(&load->instr);
nir_def *base_addr = load->src[0].ssa;
nir_def *offset = load->src[1].ssa;
nir_def *zero = NULL;
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_def *bound = load->src[2].ssa;
unsigned bit_size = load->def.bit_size;
assert(bit_size >= 8 && bit_size % 8 == 0);
unsigned byte_size = bit_size / 8;
zero = nir_imm_zero(b, load->num_components, bit_size);
unsigned load_size = byte_size * load->num_components;
nir_def *sat_offset =
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
nir_def *in_bounds =
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
nir_push_if(b, in_bounds);
}
nir_def *val =
nir_load_global_constant(b, load->def.num_components,
load->def.bit_size,
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
.align_mul = nir_intrinsic_align_mul(load),
.align_offset = nir_intrinsic_align_offset(load));
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
nir_pop_if(b, NULL);
val = nir_if_phi(b, val, zero);
}
nir_def_rewrite_uses(&load->def, val);
return true;
}
default:
return false;
}