From c0c198ffc14e646f10c8dd7e8468ff01650d65e8 Mon Sep 17 00:00:00 2001 From: M Henning Date: Tue, 5 Oct 2021 01:39:33 -0400 Subject: [PATCH] nouveau/nir: Split fewer 64-bit loads Also adjust the lowering pass to handle wide SSBO loads that we now emit for the nir case. This improves generated code quality since memoryopt can't merge SSBO loads that end up predicated on a bounds check. This also happens to fix a few test cases, only because the simpler generated IR is less likely to trigger other compiler bugs. Eg on kepler with NV50_PROG_USE_NIR=1, this fixes arb_gpu_shader_fp64-fs-non-uniform-control-flow-ubo Reviewed-by: Karol Herbst Part-of: --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 3 ++- .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index ac6f3a7726e..603c36c1f1d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -30,6 +30,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_from_common.h" #include "codegen/nv50_ir_lowering_helper.h" +#include "codegen/nv50_ir_target.h" #include "codegen/nv50_ir_util.h" #include "tgsi/tgsi_from_mesa.h" @@ -1238,7 +1239,7 @@ Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def, unsigned int tySize = typeSizeof(ty); if (tySize == 8 && - (file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) { + (indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) { Value *lo = getSSA(); Value *hi = getSSA(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18e21920a6a..e57c1c46c6e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2897,12 +2897,13 @@ NVC0LoweringPass::handleLDST(Instruction *i) i->setPredicate(CC_NOT_P, pred); if (i->defExists(0)) { Value *zero, *dst = i->getDef(0); - i->setDef(0, bld.getSSA()); + uint8_t size = dst->reg.size; + i->setDef(0, bld.getSSA(size)); bld.setPosition(i, true); - bld.mkMov((zero = bld.getSSA()), bld.mkImm(0)) + bld.mkMov((zero = bld.getSSA(size)), bld.mkImm(0), i->dType) ->setPredicate(CC_P, pred); - bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero); + bld.mkOp2(OP_UNION, i->dType, dst, i->getDef(0), zero); } } }