From e83fe65cd8aa503f567bb13500058130fad856af Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Wed, 21 Jul 2021 16:41:14 -0500
Subject: [PATCH] radv,radeonsi: Do cube size divide-by-6 lowering in NIR

No point in carrying all this code around twice each in two back-ends.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12005>
---
 .../compiler/aco_instruction_selection.cpp    | 39 ++-----------------
 src/amd/llvm/ac_nir_to_llvm.c                 | 19 +--------
 src/amd/vulkan/radv_shader.c                  |  7 ++++
 src/gallium/drivers/radeonsi/si_shader_nir.c  |  6 +++
 4 files changed, 18 insertions(+), 53 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 300356d1f3c..c594ec93057 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6485,24 +6485,9 @@ visit_image_size(isel_context* ctx, nir_intrinsic_instr* instr)
    mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
    mimg->da = glsl_sampler_type_is_array(type);
 
-   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) {
-
-      assert(instr->dest.ssa.num_components == 3);
-      Temp tmp = ctx->program->allocateTmp(v3);
-      mimg->definitions[0] = Definition(tmp);
-      emit_split_vector(ctx, tmp, 3);
-
-      /* divide 3rd value by 6 by multiplying with magic number */
-      Temp c = bld.copy(bld.def(s1), Operand::c32(0x2AAAAAAB));
-      Temp by_6 =
-         bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), emit_extract_vector(ctx, tmp, 2, v1), c);
-
-      bld.pseudo(aco_opcode::p_create_vector, Definition(dst), emit_extract_vector(ctx, tmp, 0, v1),
-                 emit_extract_vector(ctx, tmp, 1, v1), by_6);
-
-   } else if (ctx->options->chip_class == GFX9 &&
-              glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
-              glsl_sampler_type_is_array(type)) {
+   if (ctx->options->chip_class == GFX9 &&
+       glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
+       glsl_sampler_type_is_array(type)) {
       assert(instr->dest.ssa.num_components == 2);
       dmask = 0x5;
    }
@@ -9494,11 +9479,6 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
       if (!has_lod)
          lod = bld.copy(bld.def(v1), Operand::zero());
 
-      bool div_by_6 = instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-                      instr->is_array && (dmask & (1 << 2));
-      if (tmp_dst.id() == dst.id() && div_by_6)
-         tmp_dst = bld.tmp(tmp_dst.regClass());
-
       MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(tmp_dst),
                                         resource, Operand(s4), std::vector<Temp>{lod});
       if (ctx->options->chip_class == GFX9 && instr->op == nir_texop_txs &&
@@ -9512,19 +9492,6 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
       tex->da = da;
       tex->dim = dim;
 
-      if (div_by_6) {
-         /* divide 3rd value by 6 by multiplying with magic number */
-         emit_split_vector(ctx, tmp_dst, tmp_dst.size());
-         Temp c = bld.copy(bld.def(s1), Operand::c32(0x2AAAAAAB));
-         Temp by_6 = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1),
-                              emit_extract_vector(ctx, tmp_dst, 2, v1), c);
-         assert(instr->dest.ssa.num_components == 3);
-         Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v3);
-         tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp),
-                              emit_extract_vector(ctx, tmp_dst, 0, v1),
-                              emit_extract_vector(ctx, tmp_dst, 1, v1), by_6);
-      }
-
       expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
       return;
    }
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 0f611e2dfd2..d626da4983c 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2768,16 +2768,8 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intri
 
       res = ac_build_image_opcode(&ctx->ac, &args);
 
-      LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
-
-      if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
-         LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
-         LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
-         z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
-         res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
-      }
-
       if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
+         LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
          LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
          res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
       }
@@ -4616,14 +4608,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
    else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
             instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
       result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
-   else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-            instr->is_array) {
-      LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
-      LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
-      LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
-      z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
-      result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
-   } else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
+   else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
               instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
       LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
       LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 41b06eb5c9e..2c1832ea10d 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -673,10 +673,17 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
    static const nir_lower_tex_options tex_options = {
       .lower_txp = ~0,
       .lower_tg4_offsets = true,
+      .lower_txs_cube_array = true,
    };
 
    nir_lower_tex(nir, &tex_options);
 
+   static const nir_lower_image_options image_options = {
+      .lower_cube_size = true,
+   };
+
+   nir_lower_image(nir, &image_options);
+
    nir_lower_vars_to_ssa(nir);
 
    if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY ||
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5c972a368a3..fefac183026 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -815,9 +815,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
 
    static const struct nir_lower_tex_options lower_tex_options = {
       .lower_txp = ~0u,
+      .lower_txs_cube_array = true,
    };
    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
 
+   static const struct nir_lower_image_options lower_image_options = {
+      .lower_cube_size = true,
+   };
+   NIR_PASS_V(nir, nir_lower_image, &lower_image_options);
+
    const nir_lower_subgroups_options subgroups_options = {
       .subgroup_size = 64,
       .ballot_bit_size = 64,