swr/rast: Use llvm intrinsic masked gather

Use llvm intrinsic masked.gather instead of manual unroll for the cases where we have vector of pointers. Improves llvm IR debug experience by reducing a ton of IR to a single intrinsic call. Also seems to reduce overall stack use considerably. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
2026-04-27 06:40:38 +02:00 · 2018-02-02 17:03:01 -06:00 · 2018-02-02 17:03:01 -06:00 · e12db47a7d
commit e12db47a7d
parent 9cc9688e49
2 changed files with 14 additions and 0 deletions
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@ -346,6 +346,18 @@ namespace SwrJit
        return vGather;
    }

+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Alternative masked gather where source is a vector of pointers
+    /// @param pVecSrcPtr   - SIMD wide vector of pointers
+    /// @param pVecMask     - SIMD active lanes
+    /// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive
+    Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru)
+    {
+        Function* pMaskedGather = llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::masked_gather, { pVecPassthru->getType() });
+
+        return CALL(pMaskedGather, { pVecSrcPtr, C(0), pVecMask, pVecPassthru });
+    }
+
    void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
        Value* mask, Value* vGatherComponents[], bool bPackedOutput)
    {
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@ -58,6 +58,8 @@ virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byte

 Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);

+Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+
 void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);

 void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);