mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 15:30:14 +01:00
swr: Support simd16 vertex shaders
Supporting simd16 vertex shaders involves packing the output of the fetch shader appropriately, especially the vertexID buffers that have to be formatted in one simd16 register, needed by the VS. As part of this support, we needed to remove the 2nd JitManager, since it was not accounting for vector width correctly. USE_SIMD16_SHADERS is also split into two defines. The additional one (USE_SIMD16_VS) controls the width of the vertex shader (VS), while the original one (USE_SIMD16_SHADERS) controls overall front end width. Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
1874d95a8e
commit
8c83d2d371
3 changed files with 30 additions and 21 deletions
|
|
@ -1064,9 +1064,6 @@ swr_destroy_screen(struct pipe_screen *p_screen)
|
||||||
swr_fence_reference(p_screen, &screen->flush_fence, NULL);
|
swr_fence_reference(p_screen, &screen->flush_fence, NULL);
|
||||||
|
|
||||||
JitDestroyContext(screen->hJitMgr);
|
JitDestroyContext(screen->hJitMgr);
|
||||||
#if USE_SIMD16_SHADERS
|
|
||||||
JitDestroyContext(screen->hJitMgr16);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (winsys->destroy)
|
if (winsys->destroy)
|
||||||
winsys->destroy(winsys);
|
winsys->destroy(winsys);
|
||||||
|
|
@ -1150,9 +1147,6 @@ swr_create_screen_internal(struct sw_winsys *winsys)
|
||||||
|
|
||||||
// Pass in "" for architecture for run-time determination
|
// Pass in "" for architecture for run-time determination
|
||||||
screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr");
|
screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr");
|
||||||
#if USE_SIMD16_SHADERS
|
|
||||||
screen->hJitMgr16 = JitCreateContext(16, "", "swr");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
swr_fence_init(&screen->base);
|
swr_fence_init(&screen->base);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,6 @@ struct swr_screen {
|
||||||
uint32_t client_copy_limit;
|
uint32_t client_copy_limit;
|
||||||
|
|
||||||
HANDLE hJitMgr;
|
HANDLE hJitMgr;
|
||||||
#if USE_SIMD16_SHADERS
|
|
||||||
HANDLE hJitMgr16;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PFNSwrGetInterface pfnSwrGetInterface;
|
PFNSwrGetInterface pfnSwrGetInterface;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -724,7 +724,7 @@ swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
|
||||||
void
|
void
|
||||||
BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
|
BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
|
||||||
{
|
{
|
||||||
#if USE_SIMD16_FRONTEND && !USE_SIMD16_SHADERS
|
#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
|
||||||
// interleave the simdvertex components into the dest simd16vertex
|
// interleave the simdvertex components into the dest simd16vertex
|
||||||
// slot16offset = slot8offset * 2
|
// slot16offset = slot8offset * 2
|
||||||
// comp16offset = comp8offset * 2 + alternateOffset
|
// comp16offset = comp8offset * 2 + alternateOffset
|
||||||
|
|
@ -787,7 +787,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||||
const_sizes_ptr->setName("num_vs_constants");
|
const_sizes_ptr->setName("num_vs_constants");
|
||||||
|
|
||||||
Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
|
Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
|
||||||
#if USE_SIMD16_SHADERS
|
#if USE_SIMD16_VS
|
||||||
vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
|
vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -807,11 +807,22 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||||
struct lp_bld_tgsi_system_values system_values;
|
struct lp_bld_tgsi_system_values system_values;
|
||||||
memset(&system_values, 0, sizeof(system_values));
|
memset(&system_values, 0, sizeof(system_values));
|
||||||
system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
|
system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
|
||||||
|
|
||||||
|
#if USE_SIMD16_VS
|
||||||
|
system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
|
||||||
|
#else
|
||||||
system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
|
system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if USE_SIMD16_VS
|
||||||
|
uint32_t vectorWidth = mVWidth16;
|
||||||
|
#else
|
||||||
|
uint32_t vectorWidth = mVWidth;
|
||||||
|
#endif
|
||||||
|
|
||||||
lp_build_tgsi_soa(gallivm,
|
lp_build_tgsi_soa(gallivm,
|
||||||
swr_vs->pipe.tokens,
|
swr_vs->pipe.tokens,
|
||||||
lp_type_float_vec(32, 32 * mVWidth),
|
lp_type_float_vec(32, 32 * vectorWidth),
|
||||||
NULL, // mask
|
NULL, // mask
|
||||||
wrap(consts_ptr),
|
wrap(consts_ptr),
|
||||||
wrap(const_sizes_ptr),
|
wrap(const_sizes_ptr),
|
||||||
|
|
@ -829,7 +840,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||||
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
|
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
|
||||||
|
|
||||||
Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
|
Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
|
||||||
#if USE_SIMD16_SHADERS
|
#if USE_SIMD16_VS
|
||||||
vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
|
vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -905,10 +916,21 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||||
Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
|
Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
|
||||||
Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
|
Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
|
||||||
Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
|
Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
|
||||||
Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
|
#if USE_SIMD16_VS
|
||||||
FADD(FMUL(unwrap(cy), VBROADCAST(py)),
|
Value *bpx = VBROADCAST_16(px);
|
||||||
FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
|
Value *bpy = VBROADCAST_16(py);
|
||||||
FMUL(unwrap(cw), VBROADCAST(pw)))));
|
Value *bpz = VBROADCAST_16(pz);
|
||||||
|
Value *bpw = VBROADCAST_16(pw);
|
||||||
|
#else
|
||||||
|
Value *bpx = VBROADCAST(px);
|
||||||
|
Value *bpy = VBROADCAST(py);
|
||||||
|
Value *bpz = VBROADCAST(pz);
|
||||||
|
Value *bpw = VBROADCAST(pw);
|
||||||
|
#endif
|
||||||
|
Value *dist = FADD(FMUL(unwrap(cx), bpx),
|
||||||
|
FADD(FMUL(unwrap(cy), bpy),
|
||||||
|
FADD(FMUL(unwrap(cz), bpz),
|
||||||
|
FMUL(unwrap(cw), bpw))));
|
||||||
|
|
||||||
if (val < 4)
|
if (val < 4)
|
||||||
WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
|
WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
|
||||||
|
|
@ -942,11 +964,7 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
BuilderSWR builder(
|
BuilderSWR builder(
|
||||||
#if USE_SIMD16_SHADERS
|
|
||||||
reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr16),
|
|
||||||
#else
|
|
||||||
reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
|
reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
|
||||||
#endif
|
|
||||||
"VS");
|
"VS");
|
||||||
PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
|
PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue