mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'nak/opt/sink' into 'main'
nak: sink load_global See merge request mesa/mesa!40904
This commit is contained in:
commit
8846661ee3
4 changed files with 36 additions and 53 deletions
|
|
@ -377,7 +377,10 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
case nir_intrinsic_load_constant:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_bounded:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global_constant_bounded:
|
||||
case nir_intrinsic_load_global_constant_offset:
|
||||
case nir_intrinsic_load_kernel_input:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_attribute_pan: {
|
||||
|
|
|
|||
|
|
@ -166,6 +166,7 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_
|
|||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_global ||
|
||||
intrin->intrinsic == nir_intrinsic_load_global_amd ||
|
||||
intrin->intrinsic == nir_intrinsic_load_global_nv ||
|
||||
intrin->intrinsic == nir_intrinsic_load_ubo ||
|
||||
intrin->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
|
||||
intrin->intrinsic == nir_intrinsic_load_ssbo) {
|
||||
|
|
@ -232,6 +233,7 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_
|
|||
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_amd: /* = global + convergent */
|
||||
case nir_intrinsic_load_global_nv:
|
||||
return options & nir_move_load_global;
|
||||
|
||||
case nir_intrinsic_ldc_nv:
|
||||
|
|
|
|||
|
|
@ -1033,8 +1033,6 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
}
|
||||
case nir_intrinsic_load_global_bounded:
|
||||
case nir_intrinsic_load_global_constant_bounded: {
|
||||
assert(nak->sm >= 73);
|
||||
|
||||
nir_src *base = &intr->src[0];
|
||||
nir_src *offset = &intr->src[1];
|
||||
nir_src *size = &intr->src[2];
|
||||
|
|
@ -1044,7 +1042,31 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
nir_def *addr = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa));
|
||||
nir_def *last_byte = nir_iadd_imm(&b, offset->ssa, load_size - 1);
|
||||
nir_def *cond = nir_ult(&b, last_byte, size->ssa);
|
||||
|
||||
nir_def *zero = NULL;
|
||||
if (nak->sm < 73) {
|
||||
zero = nir_imm_zero(&b, intr->def.num_components, intr->def.bit_size);
|
||||
nir_push_if(&b, cond);
|
||||
}
|
||||
|
||||
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, addr, cond);
|
||||
new = nir_def_as_intrinsic(res);
|
||||
|
||||
if (nak->sm < 73) {
|
||||
nir_pop_if(&b, NULL);
|
||||
res = nir_if_phi(&b, res, zero);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_global_constant_offset: {
|
||||
nir_src *base = &intr->src[0];
|
||||
nir_src *offset = &intr->src[1];
|
||||
|
||||
nir_def *address = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa));
|
||||
nir_def *nir_true = nir_imm_bool(&b, true);
|
||||
|
||||
res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, address, nir_true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_scratch:
|
||||
|
|
@ -1084,7 +1106,8 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak)
|
|||
if (nir_intrinsic_has_access(intr))
|
||||
nir_intrinsic_set_access(new, nir_intrinsic_access(intr));
|
||||
if (intr->intrinsic == nir_intrinsic_load_global_constant ||
|
||||
intr->intrinsic == nir_intrinsic_load_global_constant_bounded)
|
||||
intr->intrinsic == nir_intrinsic_load_global_constant_bounded ||
|
||||
intr->intrinsic == nir_intrinsic_load_global_constant_offset)
|
||||
nir_intrinsic_set_access(new, nir_intrinsic_access(new) | ACCESS_CAN_REORDER);
|
||||
|
||||
if (nir_intrinsic_has_align_mul(intr))
|
||||
|
|
@ -1311,7 +1334,10 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
UNREACHABLE("Unsupported shader stage");
|
||||
}
|
||||
|
||||
OPT(nir, nak_nir_lower_load_store, nak);
|
||||
/* sink memory loads once before we add our own bound checking */
|
||||
OPT(nir, nir_opt_sink, nir_move_load_ssbo | nir_move_load_ubo);
|
||||
if (OPT(nir, nak_nir_lower_load_store, nak))
|
||||
OPT(nir, nir_opt_constant_folding);
|
||||
|
||||
struct nir_opt_offsets_options nak_offset_options = {
|
||||
.max_offset_cb = nak_nir_max_imm_offset,
|
||||
|
|
@ -1355,6 +1381,7 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
} while (progress);
|
||||
|
||||
OPT(nir, nir_opt_move, nir_move_comparisons | nir_move_load_ubo);
|
||||
OPT(nir, nir_opt_sink, nir_move_load_global);
|
||||
|
||||
if (nak->sm < 70) {
|
||||
const nir_split_conversions_options split_conv_opts = {
|
||||
|
|
|
|||
|
|
@ -244,8 +244,6 @@ static bool
|
|||
lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
|
||||
UNUSED void *data)
|
||||
{
|
||||
struct nvk_physical_device *pdev = data;
|
||||
|
||||
switch (load->intrinsic) {
|
||||
case nir_intrinsic_load_ubo: {
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
|
@ -275,53 +273,6 @@ lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
|
|||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_global_constant_bounded:
|
||||
/* Handled inside nak_nir_lower_load_store */
|
||||
if (pdev->info.sm >= 73)
|
||||
return false;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_load_global_constant_offset: {
|
||||
b->cursor = nir_before_instr(&load->instr);
|
||||
|
||||
nir_def *base_addr = load->src[0].ssa;
|
||||
nir_def *offset = load->src[1].ssa;
|
||||
|
||||
nir_def *zero = NULL;
|
||||
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_def *bound = load->src[2].ssa;
|
||||
|
||||
unsigned bit_size = load->def.bit_size;
|
||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
zero = nir_imm_zero(b, load->num_components, bit_size);
|
||||
|
||||
unsigned load_size = byte_size * load->num_components;
|
||||
|
||||
nir_def *sat_offset =
|
||||
nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
|
||||
nir_def *in_bounds =
|
||||
nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
|
||||
|
||||
nir_push_if(b, in_bounds);
|
||||
}
|
||||
|
||||
nir_def *val =
|
||||
nir_load_global_constant(b, load->def.num_components,
|
||||
load->def.bit_size,
|
||||
nir_iadd(b, base_addr, nir_u2u64(b, offset)),
|
||||
.align_mul = nir_intrinsic_align_mul(load),
|
||||
.align_offset = nir_intrinsic_align_offset(load));
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
|
||||
nir_pop_if(b, NULL);
|
||||
val = nir_if_phi(b, val, zero);
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&load->def, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue