mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 09:20:13 +01:00
nir: add hole_size parameter into the vectorize callback
It will be used to allow merging loads with a hole between them. Reviewed-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29398>
This commit is contained in:
parent
8ce43b7765
commit
02923e237d
18 changed files with 55 additions and 27 deletions
|
|
@ -113,10 +113,10 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm,
|
|||
|
||||
bool
|
||||
ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low,
|
||||
unsigned num_components, unsigned hole_size, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *data)
|
||||
{
|
||||
if (num_components > 4)
|
||||
if (num_components > 4 || hole_size)
|
||||
return false;
|
||||
|
||||
bool is_scratch = false;
|
||||
|
|
|
|||
|
|
@ -243,8 +243,8 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm,
|
|||
nir_shader_compiler_options *options);
|
||||
|
||||
bool ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *data);
|
||||
unsigned num_components, unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
|
||||
|
||||
unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask,
|
||||
bool writes_mrt0_alpha);
|
||||
|
|
|
|||
|
|
@ -2795,9 +2795,12 @@ agx_optimize_loop_nir(nir_shader *nir)
|
|||
|
||||
static bool
|
||||
mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *data)
|
||||
unsigned num_components, unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
/* Must be aligned to the size of the load */
|
||||
unsigned align = nir_combined_align(align_mul, align_offset);
|
||||
if ((bit_size / 8) > align)
|
||||
|
|
|
|||
|
|
@ -2084,10 +2084,14 @@ static bool
|
|||
mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
/* TMU general access only supports 32-bit vectors */
|
||||
if (bit_size > 32)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -5896,6 +5896,9 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
|
|||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
/* The hole between low and
|
||||
* high if they are not adjacent. */
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data);
|
||||
|
|
|
|||
|
|
@ -1920,6 +1920,7 @@ should_vectorize(unsigned align_mul,
|
|||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
|
|
@ -1933,7 +1934,7 @@ should_vectorize(unsigned align_mul,
|
|||
struct stack_op_vectorizer_state *state = data;
|
||||
|
||||
return state->driver_callback(align_mul, align_offset,
|
||||
bit_size, num_components,
|
||||
bit_size, num_components, hole_size,
|
||||
low, high, state->driver_data);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -642,7 +642,7 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size,
|
|||
|
||||
if (!ctx->options->callback(low->align_mul,
|
||||
low->align_offset,
|
||||
new_bit_size, new_num_components,
|
||||
new_bit_size, new_num_components, 0,
|
||||
low->intrin, high->intrin,
|
||||
ctx->options->cb_data))
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ protected:
|
|||
|
||||
static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned num_components, unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data);
|
||||
static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
|
||||
|
|
@ -336,10 +336,13 @@ bool nir_load_store_vectorize_test::test_alu_def(
|
|||
|
||||
bool nir_load_store_vectorize_test::mem_vectorize_callback(
|
||||
unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned num_components, unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
/* Calculate a simple alignment, like how nir_intrinsic_align() does. */
|
||||
uint32_t align = align_mul;
|
||||
if (align_offset)
|
||||
|
|
|
|||
|
|
@ -110,9 +110,12 @@ ir3_nir_should_scalarize_mem(const nir_instr *instr, const void *data)
|
|||
static bool
|
||||
ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size, unsigned num_components,
|
||||
nir_intrinsic_instr *low,
|
||||
unsigned hole_size, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
struct ir3_compiler *compiler = data;
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
|
|
|
|||
|
|
@ -3265,13 +3265,15 @@ ntt_should_vectorize_instr(const nir_instr *instr, const void *data)
|
|||
return 4;
|
||||
}
|
||||
|
||||
/* TODO: These parameters are wrong. */
|
||||
static bool
|
||||
ntt_should_vectorize_io(unsigned align, unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
if (bit_size != 32)
|
||||
if (bit_size != 32 || hole_size)
|
||||
return false;
|
||||
|
||||
/* Our offset alignment should aways be at least 4 bytes */
|
||||
|
|
|
|||
|
|
@ -1411,6 +1411,7 @@ bool
|
|||
brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
|
|
@ -1419,7 +1420,7 @@ brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
|||
* those back into 32-bit ones anyway and UBO loads aren't split in NIR so
|
||||
* we don't want to make a mess for the back-end.
|
||||
*/
|
||||
if (bit_size > 32)
|
||||
if (bit_size > 32 || hole_size)
|
||||
return false;
|
||||
|
||||
if (low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
|
||||
|
|
|
|||
|
|
@ -233,6 +233,7 @@ enum brw_reg_type brw_type_for_nir_type(const struct intel_device_info *devinfo,
|
|||
bool brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data);
|
||||
|
|
|
|||
|
|
@ -1124,10 +1124,11 @@ elk_nir_link_shaders(const struct elk_compiler *compiler,
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
static bool
|
||||
elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
|
|
@ -1136,7 +1137,7 @@ elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
|||
* those back into 32-bit ones anyway and UBO loads aren't split in NIR so
|
||||
* we don't want to make a mess for the back-end.
|
||||
*/
|
||||
if (bit_size > 32)
|
||||
if (bit_size > 32 || hole_size)
|
||||
return false;
|
||||
|
||||
if (low->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel ||
|
||||
|
|
|
|||
|
|
@ -248,13 +248,6 @@ void elk_nir_apply_key(nir_shader *nir,
|
|||
unsigned elk_nir_api_subgroup_size(const nir_shader *nir,
|
||||
unsigned hw_subgroup_size);
|
||||
|
||||
bool elk_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *data);
|
||||
|
||||
void elk_nir_analyze_ubo_ranges(const struct elk_compiler *compiler,
|
||||
nir_shader *nir,
|
||||
struct elk_ubo_range out_ranges[4]);
|
||||
|
|
|
|||
|
|
@ -6229,10 +6229,11 @@ vectorize_filter(
|
|||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
return util_is_power_of_two_nonzero(num_components);
|
||||
return !hole_size && util_is_power_of_two_nonzero(num_components);
|
||||
}
|
||||
|
||||
struct lower_mem_bit_sizes_data {
|
||||
|
|
|
|||
|
|
@ -138,6 +138,7 @@ private:
|
|||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *cb_data);
|
||||
|
|
@ -1369,10 +1370,14 @@ Converter::memVectorizeCb(unsigned align_mul,
|
|||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components,
|
||||
unsigned hole_size,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high,
|
||||
void *cb_data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Since we legalize these later with nir_lower_mem_access_bit_sizes,
|
||||
* we can optimistically combine anything that might be profitable
|
||||
|
|
|
|||
|
|
@ -799,8 +799,8 @@ nak_nir_remove_barriers(nir_shader *nir)
|
|||
static bool
|
||||
nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size, unsigned num_components,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *cb_data)
|
||||
unsigned hole_size, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *cb_data)
|
||||
{
|
||||
/*
|
||||
* Since we legalize these later with nir_lower_mem_access_bit_sizes,
|
||||
|
|
@ -808,6 +808,9 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
|
|||
*/
|
||||
assert(util_is_power_of_two_nonzero(align_mul));
|
||||
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
unsigned max_bytes = 128u / 8u;
|
||||
if (low->intrinsic == nir_intrinsic_ldc_nv ||
|
||||
low->intrinsic == nir_intrinsic_ldcx_nv)
|
||||
|
|
|
|||
|
|
@ -4632,9 +4632,13 @@ mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
|
|||
|
||||
static bool
|
||||
mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high, void *data)
|
||||
unsigned num_components, unsigned hole_size,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
|
||||
void *data)
|
||||
{
|
||||
if (hole_size)
|
||||
return false;
|
||||
|
||||
/* Must be aligned to the size of the load */
|
||||
unsigned align = nir_combined_align(align_mul, align_offset);
|
||||
if ((bit_size / 8) > align)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue