diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 7148ed0a221..200e53e6997 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1537,6 +1537,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, progress |= OPT(s, nir_lower_mem_access_bit_sizes, &mem_bit_size_options); progress |= OPT(s, ir3_nir_lower_64b_global); progress |= OPT(s, ir3_nir_lower_64b_undef); + progress |= OPT(s, ir3_nir_lower_64b_image); progress |= OPT(s, nir_lower_int64); progress |= OPT(s, nir_lower_64bit_phis); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 46006e9848b..887170d68cf 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -54,6 +54,7 @@ uint8_t ir3_nir_vectorize_filter(const nir_instr *instr, const void *data); bool ir3_nir_lower_64b_undef(nir_shader *shader); bool ir3_nir_lower_64b_global(nir_shader *shader); bool ir3_nir_lower_64b_regs(nir_shader *shader); +bool ir3_nir_lower_64b_image(nir_shader *shader); nir_mem_access_size_align ir3_mem_access_size_align( nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, uint32_t align, diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index 1688c7346a1..4cab8d04652 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -205,3 +205,71 @@ ir3_nir_lower_64b_regs(nir_shader *shader) return progress; } + +static void +lower_64b_image_load(nir_builder *b, nir_intrinsic_instr *intr) +{ + nir_intrinsic_set_format(intr, PIPE_FORMAT_R32G32_UINT); + nir_intrinsic_set_dest_type(intr, nir_type_uint32); + unsigned ncomp = intr->num_components; + intr->num_components = 2; + intr->def.num_components = 2; + intr->def.bit_size = 32; + + /* We only load the R component but the original load might need GBA as well, + * return the default (0, 0, 1) values. + */ + b->cursor = nir_after_instr(&intr->instr); + nir_def *val_r64 = nir_pack_64_2x32(b, &intr->def); + nir_def *vec[4] = {val_r64, nir_imm_int64(b, 0), nir_imm_int64(b, 0), + nir_imm_int64(b, 1)}; + assert(ncomp <= ARRAY_SIZE(vec)); + nir_def_rewrite_uses_after(&intr->def, nir_vec(b, vec, ncomp)); +} + +static void +lower_64b_image_store(nir_builder *b, nir_intrinsic_instr *intr) +{ + nir_intrinsic_set_format(intr, PIPE_FORMAT_R32G32_UINT); + nir_intrinsic_set_src_type(intr, nir_type_uint32); + intr->num_components = 2; + + b->cursor = nir_before_instr(&intr->instr); + nir_def *val_r64 = nir_channel(b, intr->src[3].ssa, 0); + nir_def *val_r32g32 = nir_unpack_64_2x32(b, val_r64); + nir_src_rewrite(&intr->src[3], val_r32g32); +} + +static bool +lower_64b_image(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_store: { + enum pipe_format format = nir_intrinsic_format(intr); + if (format != PIPE_FORMAT_R64_UINT && format != PIPE_FORMAT_R64_SINT) { + return false; + } + break; + } + default: + return false; + } + + if (intr->intrinsic == nir_intrinsic_bindless_image_load) { + lower_64b_image_load(b, intr); + } else { + lower_64b_image_store(b, intr); + } + + return true; +} + +/* Lower r64u?i image loads/stores to r32g32u, which is our descriptor format. + */ +bool +ir3_nir_lower_64b_image(nir_shader *shader) +{ + return nir_shader_intrinsics_pass(shader, lower_64b_image, + nir_metadata_control_flow, NULL); +}