nouveau/nir: Split fewer 64-bit loads

Also adjust the lowering pass to handle wide SSBO loads that we now emit
for the nir case.

This improves generated code quality since memoryopt can't
merge SSBO loads that end up predicated on a bounds check.

This also happens to fix a few test cases, only because the simpler generated
IR is less likely to trigger other compiler bugs. Eg on kepler with
NV50_PROG_USE_NIR=1, this fixes
arb_gpu_shader_fp64-fs-non-uniform-control-flow-ubo

Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16063>
This commit is contained in:
M Henning 2021-10-05 01:39:33 -04:00 committed by Marge Bot
parent 1b32d4b7d4
commit c0c198ffc1
2 changed files with 6 additions and 4 deletions

View file

@ -30,6 +30,7 @@
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_from_common.h"
#include "codegen/nv50_ir_lowering_helper.h"
#include "codegen/nv50_ir_target.h"
#include "codegen/nv50_ir_util.h"
#include "tgsi/tgsi_from_mesa.h"
@ -1238,7 +1239,7 @@ Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
unsigned int tySize = typeSizeof(ty);
if (tySize == 8 &&
(file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
(indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
Value *lo = getSSA();
Value *hi = getSSA();

View file

@ -2897,12 +2897,13 @@ NVC0LoweringPass::handleLDST(Instruction *i)
i->setPredicate(CC_NOT_P, pred);
if (i->defExists(0)) {
Value *zero, *dst = i->getDef(0);
i->setDef(0, bld.getSSA());
uint8_t size = dst->reg.size;
i->setDef(0, bld.getSSA(size));
bld.setPosition(i, true);
bld.mkMov((zero = bld.getSSA()), bld.mkImm(0))
bld.mkMov((zero = bld.getSSA(size)), bld.mkImm(0), i->dType)
->setPredicate(CC_P, pred);
bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero);
bld.mkOp2(OP_UNION, i->dType, dst, i->getDef(0), zero);
}
}
}