diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index fa97d0b6f9e..7eef6a7300c 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -115,10 +115,10 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, bool ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, unsigned hole_size, nir_intrinsic_instr *low, + unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (num_components > 4 || hole_size) + if (num_components > 4 || hole_size > 0) return false; bool is_scratch = false; diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index ec9fdc2eb7a..b6ce1c1fcce 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -245,7 +245,7 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, nir_shader_compiler_options *options); bool ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, unsigned hole_size, + unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask, diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 9a15d94ac1c..07ca606844c 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2839,10 +2839,10 @@ agx_optimize_loop_nir(nir_shader *nir) bool agx_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, nir_intrinsic_instr *low, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (hole_size) + if (hole_size > 0) return false; /* Must be aligned to the size of the load */ diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 5d55bd876a2..b2e4e0b9c33 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -305,7 +305,7 @@ bool agx_nir_lower_cull_distance_fs(struct nir_shader *s, unsigned nr_distances); bool agx_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, nir_intrinsic_instr *low, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 2a12438b55f..c45d92b6264 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2089,12 +2089,12 @@ static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (hole_size || !nir_num_components_valid(num_components)) + if (hole_size > 0 || !nir_num_components_valid(num_components)) return false; /* TMU general access only supports 32-bit vectors */ diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 16e0e58d93e..9957249f5de 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6007,7 +6007,7 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, unsigned num_components, /* The hole between low and * high if they are not adjacent. */ - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c index 245d6dd4c19..3055e83a519 100644 --- a/src/compiler/nir/nir_lower_shader_calls.c +++ b/src/compiler/nir/nir_lower_shader_calls.c @@ -1920,7 +1920,7 @@ should_vectorize(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 4a8c5099a18..75237cd6ce2 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -669,8 +669,7 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size, unsigned low_size = low->intrin->num_components * get_bit_size(low) / 8; /* The hole size can be less than 0 if low and high instructions overlap. */ - unsigned hole_size = - MAX2(high->offset_signed - (low->offset_signed + low_size), 0); + int64_t hole_size = high->offset_signed - (low->offset_signed + low_size); if (!ctx->options->callback(low->align_mul, low->align_offset, diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp index 66bc813623f..48dd1fb415f 100644 --- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -71,7 +71,7 @@ protected: static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, unsigned hole_size, + unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align); @@ -83,7 +83,7 @@ protected: std::map res_map; unsigned max_components = 4; bool overfetch = false; - unsigned max_hole_size = 0; + int64_t max_hole_size = 0; }; std::string @@ -340,7 +340,7 @@ bool nir_load_store_vectorize_test::test_alu_def( bool nir_load_store_vectorize_test::mem_vectorize_callback( unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, unsigned hole_size, + unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 57a954aa348..3c8fb8a1fc9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -163,10 +163,10 @@ ir3_nir_should_scalarize_mem(const nir_instr *instr, const void *data) static bool ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, nir_intrinsic_instr *low, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (hole_size || !nir_num_components_valid(num_components)) + if (hole_size > 0 || !nir_num_components_valid(num_components)) return false; struct ir3_compiler *compiler = data; diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 12df4e483a8..2dda6a099a0 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -3269,11 +3269,11 @@ ntt_should_vectorize_instr(const nir_instr *instr, const void *data) static bool ntt_should_vectorize_io(unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (bit_size != 32 || hole_size || !nir_num_components_valid(num_components)) + if (bit_size != 32 || hole_size > 0 || !nir_num_components_valid(num_components)) return false; /* Our offset alignment should aways be at least 4 bytes */ diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 2290071f1c4..45df7935445 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1465,7 +1465,7 @@ bool brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 1d1a04c8463..1b2f35b9c65 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -234,7 +234,7 @@ enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo, bool brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c index f8a8c151d30..4721386c65c 100644 --- a/src/intel/compiler/elk/elk_nir.c +++ b/src/intel/compiler/elk/elk_nir.c @@ -1129,7 +1129,7 @@ static bool elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) @@ -1138,7 +1138,7 @@ elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, * those back into 32-bit ones anyway and UBO loads aren't split in NIR so * we don't want to make a mess for the back-end. */ - if (bit_size > 32 || hole_size || !nir_num_components_valid(num_components)) + if (bit_size > 32 || hole_size > 0 || !nir_num_components_valid(num_components)) return false; if (low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel || diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index be878258f66..d003cd63abb 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -6232,11 +6232,11 @@ vectorize_filter( unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - return !hole_size && util_is_power_of_two_nonzero(num_components); + return hole_size <= 0 && util_is_power_of_two_nonzero(num_components); } struct lower_mem_bit_sizes_data { diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp index 76027b463fb..bd55207223c 100644 --- a/src/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp @@ -138,7 +138,7 @@ private: unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *cb_data); @@ -1371,12 +1371,12 @@ Converter::memVectorizeCb(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *cb_data) { - if (hole_size) + if (hole_size > 0) return false; /* diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 22c548e8c3f..39463097ca8 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -799,7 +799,7 @@ nak_nir_remove_barriers(nir_shader *nir) static bool nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, unsigned num_components, - unsigned hole_size, nir_intrinsic_instr *low, + int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *cb_data) { /* @@ -808,7 +808,7 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, */ assert(util_is_power_of_two_nonzero(align_mul)); - if (hole_size) + if (hole_size > 0) return false; unsigned max_bytes = 128u / 8u; diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index cc66de613d5..796246824e4 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -4809,11 +4809,11 @@ mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, static bool mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, - unsigned num_components, unsigned hole_size, + unsigned num_components, int64_t hole_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data) { - if (hole_size) + if (hole_size > 0) return false; /* Must be aligned to the size of the load */