nouveau/nir: Split fewer 64-bit loads

Also adjust the lowering pass to handle wide SSBO loads that we now emit for the nir case. This improves generated code quality since memoryopt can't merge SSBO loads that end up predicated on a bounds check. This also happens to fix a few test cases, only because the simpler generated IR is less likely to trigger other compiler bugs. Eg on kepler with NV50_PROG_USE_NIR=1, this fixes arb_gpu_shader_fp64-fs-non-uniform-control-flow-ubo Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16063>
2025-12-24 06:40:11 +01:00 · 2021-10-05 01:39:33 -04:00 · 2021-10-05 01:39:33 -04:00 · c0c198ffc1
commit c0c198ffc1
parent 1b32d4b7d4
2 changed files with 6 additions and 4 deletions
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@ -30,6 +30,7 @@
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_from_common.h"
 #include "codegen/nv50_ir_lowering_helper.h"
+#include "codegen/nv50_ir_target.h"
 #include "codegen/nv50_ir_util.h"
 #include "tgsi/tgsi_from_mesa.h"

@ -1238,7 +1239,7 @@ Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
   unsigned int tySize = typeSizeof(ty);

   if (tySize == 8 &&
-       (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
+       (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
      Value *lo = getSSA();
      Value *hi = getSSA();

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@ -2897,12 +2897,13 @@ NVC0LoweringPass::handleLDST(Instruction *i)
      i->setPredicate(CC_NOT_P, pred);
      if (i->defExists(0)) {
         Value *zero, *dst = i->getDef(0);
-         i->setDef(0, bld.getSSA());
+         uint8_t size = dst->reg.size;
+         i->setDef(0, bld.getSSA(size));

         bld.setPosition(i, true);
-         bld.mkMov((zero = bld.getSSA()), bld.mkImm(0))
+         bld.mkMov((zero = bld.getSSA(size)), bld.mkImm(0), i->dType)
            ->setPredicate(CC_P, pred);
-         bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero);
+         bld.mkOp2(OP_UNION, i->dType, dst, i->getDef(0), zero);
      }
   }
 }