diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c index e9a3d08f6c4..6de24075c3d 100644 --- a/src/compiler/nir/nir_opt_shrink_vectors.c +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -377,7 +377,10 @@ opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_load_constant: case nir_intrinsic_load_shared: case nir_intrinsic_load_global: + case nir_intrinsic_load_global_bounded: case nir_intrinsic_load_global_constant: + case nir_intrinsic_load_global_constant_bounded: + case nir_intrinsic_load_global_constant_offset: case nir_intrinsic_load_kernel_input: case nir_intrinsic_load_scratch: case nir_intrinsic_load_attribute_pan: { diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c index e32b940da7c..e35ccb0aa74 100644 --- a/src/compiler/nir/nir_opt_sink.c +++ b/src/compiler/nir/nir_opt_sink.c @@ -166,6 +166,7 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_ if (intrin->intrinsic == nir_intrinsic_load_global || intrin->intrinsic == nir_intrinsic_load_global_amd || + intrin->intrinsic == nir_intrinsic_load_global_nv || intrin->intrinsic == nir_intrinsic_load_ubo || intrin->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel || intrin->intrinsic == nir_intrinsic_load_ssbo) { @@ -232,6 +233,7 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_ case nir_intrinsic_load_global: case nir_intrinsic_load_global_amd: /* = global + convergent */ + case nir_intrinsic_load_global_nv: return options & nir_move_load_global; case nir_intrinsic_ldc_nv: diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 9c129859c63..1df593e16fe 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -1033,8 +1033,6 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak) } case nir_intrinsic_load_global_bounded: case nir_intrinsic_load_global_constant_bounded: { - assert(nak->sm >= 73); - nir_src *base = &intr->src[0]; nir_src *offset = &intr->src[1]; nir_src *size = &intr->src[2]; @@ -1044,7 +1042,31 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak) nir_def *addr = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa)); nir_def *last_byte = nir_iadd_imm(&b, offset->ssa, load_size - 1); nir_def *cond = nir_ult(&b, last_byte, size->ssa); + + nir_def *zero = NULL; + if (nak->sm < 73) { + zero = nir_imm_zero(&b, intr->def.num_components, intr->def.bit_size); + nir_push_if(&b, cond); + } + res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, addr, cond); + new = nir_def_as_intrinsic(res); + + if (nak->sm < 73) { + nir_pop_if(&b, NULL); + res = nir_if_phi(&b, res, zero); + } + + break; + } + case nir_intrinsic_load_global_constant_offset: { + nir_src *base = &intr->src[0]; + nir_src *offset = &intr->src[1]; + + nir_def *address = nir_iadd(&b, base->ssa, nir_u2u64(&b, offset->ssa)); + nir_def *nir_true = nir_imm_bool(&b, true); + + res = nir_load_global_nv(&b, intr->def.num_components, intr->def.bit_size, address, nir_true); break; } case nir_intrinsic_load_scratch: @@ -1084,7 +1106,8 @@ nak_nir_lower_load_store(nir_shader *nir, const struct nak_compiler *nak) if (nir_intrinsic_has_access(intr)) nir_intrinsic_set_access(new, nir_intrinsic_access(intr)); if (intr->intrinsic == nir_intrinsic_load_global_constant || - intr->intrinsic == nir_intrinsic_load_global_constant_bounded) + intr->intrinsic == nir_intrinsic_load_global_constant_bounded || + intr->intrinsic == nir_intrinsic_load_global_constant_offset) nir_intrinsic_set_access(new, nir_intrinsic_access(new) | ACCESS_CAN_REORDER); if (nir_intrinsic_has_align_mul(intr)) @@ -1311,7 +1334,10 @@ nak_postprocess_nir(nir_shader *nir, UNREACHABLE("Unsupported shader stage"); } - OPT(nir, nak_nir_lower_load_store, nak); + /* sink memory loads once before we add our own bound checking */ + OPT(nir, nir_opt_sink, nir_move_load_ssbo | nir_move_load_ubo); + if (OPT(nir, nak_nir_lower_load_store, nak)) + OPT(nir, nir_opt_constant_folding); struct nir_opt_offsets_options nak_offset_options = { .max_offset_cb = nak_nir_max_imm_offset, @@ -1355,6 +1381,7 @@ nak_postprocess_nir(nir_shader *nir, } while (progress); OPT(nir, nir_opt_move, nir_move_comparisons | nir_move_load_ubo); + OPT(nir, nir_opt_sink, nir_move_load_global); if (nak->sm < 70) { const nir_split_conversions_options split_conv_opts = { diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 8f7f781034b..b90742c4ee7 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -244,8 +244,6 @@ static bool lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load, UNUSED void *data) { - struct nvk_physical_device *pdev = data; - switch (load->intrinsic) { case nir_intrinsic_load_ubo: { b->cursor = nir_before_instr(&load->instr); @@ -275,53 +273,6 @@ lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load, return true; } - case nir_intrinsic_load_global_constant_bounded: - /* Handled inside nak_nir_lower_load_store */ - if (pdev->info.sm >= 73) - return false; - FALLTHROUGH; - case nir_intrinsic_load_global_constant_offset: { - b->cursor = nir_before_instr(&load->instr); - - nir_def *base_addr = load->src[0].ssa; - nir_def *offset = load->src[1].ssa; - - nir_def *zero = NULL; - if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) { - nir_def *bound = load->src[2].ssa; - - unsigned bit_size = load->def.bit_size; - assert(bit_size >= 8 && bit_size % 8 == 0); - unsigned byte_size = bit_size / 8; - - zero = nir_imm_zero(b, load->num_components, bit_size); - - unsigned load_size = byte_size * load->num_components; - - nir_def *sat_offset = - nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1))); - nir_def *in_bounds = - nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound); - - nir_push_if(b, in_bounds); - } - - nir_def *val = - nir_load_global_constant(b, load->def.num_components, - load->def.bit_size, - nir_iadd(b, base_addr, nir_u2u64(b, offset)), - .align_mul = nir_intrinsic_align_mul(load), - .align_offset = nir_intrinsic_align_offset(load)); - - if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) { - nir_pop_if(b, NULL); - val = nir_if_phi(b, val, zero); - } - - nir_def_rewrite_uses(&load->def, val); - return true; - } - default: return false; }