mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-26 17:40:39 +02:00
Generate the base shader.
This commit is contained in:
parent
2dbba8b024
commit
9e6d58fac2
4 changed files with 194 additions and 47 deletions
|
|
@ -112,7 +112,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
|
|||
float (*consts)[4] = (float (*)[4]) draw->mapped_constants;
|
||||
struct ga_llvm_prog *prog = draw->vertex_shader->state->llvm_prog;
|
||||
|
||||
fprintf(stderr, "XX q(%d) ", draw->vs.queue_nr);
|
||||
fprintf(stderr, "--- XX q(%d) ", draw->vs.queue_nr);
|
||||
|
||||
/* fetch the inputs */
|
||||
for (i = 0; i < draw->vs.queue_nr; ++i) {
|
||||
|
|
@ -123,7 +123,8 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
|
|||
|
||||
/* batch execute the shaders on all the vertices */
|
||||
ga_llvm_prog_exec(prog, inputs, dests, consts,
|
||||
draw->vs.queue_nr);
|
||||
draw->vs.queue_nr,
|
||||
draw->vertex_info.num_attribs);
|
||||
|
||||
draw->vs.queue_nr = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,20 +1,29 @@
|
|||
/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm-dis */
|
||||
/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */
|
||||
typedef __attribute__(( ocu_vector_type(4) )) float float4;
|
||||
|
||||
#if 0
|
||||
//clang doesn't suppoer "struct->member" notation yet
|
||||
struct vertex_header {
|
||||
unsigned clipmask:12;
|
||||
unsigned edgeflag:1;
|
||||
unsigned pad:3;
|
||||
unsigned vertex_id:16;
|
||||
|
||||
inline float4 compute_clip(float4 vec, float4 scale, float4 trans)
|
||||
{
|
||||
return vec*scale + trans;
|
||||
}
|
||||
float clip[4];
|
||||
|
||||
float data[][4];
|
||||
};
|
||||
|
||||
inline float
|
||||
dot4(const float4 a, const float4 b)
|
||||
dot4(float4 a, float4 b)
|
||||
{
|
||||
float4 c = a*b;
|
||||
return c.x + c.y + c.z + c.w;
|
||||
}
|
||||
|
||||
inline unsigned
|
||||
compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
|
||||
compute_clipmask(float4 clip, float4 (*plane), unsigned nr)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
unsigned i;
|
||||
|
|
@ -29,7 +38,8 @@ compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
|
|||
|
||||
inline void collect_results(float4 *results, struct vertex_header *vOut,
|
||||
float4 *planes, int nr_planes,
|
||||
float4 scale, float4 trans)
|
||||
float4 scale, float4 trans,
|
||||
int num_attribs)
|
||||
{
|
||||
/* store results */
|
||||
unsigned slot;
|
||||
|
|
@ -38,13 +48,14 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
|
|||
/* Handle attr[0] (position) specially:
|
||||
*/
|
||||
float4 res0 = results[0];
|
||||
x = vOut->clip[0] = clip.x;
|
||||
y = vOut->clip[1] = clip.y;
|
||||
z = vOut->clip[2] = clip.z;
|
||||
w = vOut->clip[3] = clip.w;
|
||||
float *clip = vOut->clip;
|
||||
x = clip[0] = res0.x;
|
||||
y = clip[1] = res0.y;
|
||||
z = clip[2] = res0.z;
|
||||
w = clip[3] = res0.w;
|
||||
|
||||
vOut[i]->clipmask = compute_clipmask(res0, planes, nr_planes);
|
||||
vOut[i]->edgeflag = 1;
|
||||
vOut->clipmask = compute_clipmask(res0, planes, nr_planes);
|
||||
vOut->edgeflag = 1;
|
||||
|
||||
/* divide by w */
|
||||
w = 1.0f / w;
|
||||
|
|
@ -54,10 +65,10 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
|
|||
res0.x = x; res0.y = y; res0.z = z; res0.w = 1;
|
||||
|
||||
/* Viewport mapping */
|
||||
res = res * scale + trans;
|
||||
vOut->data[0][0] = res.x;
|
||||
vOut->data[0][1] = res.y;
|
||||
vOut->data[0][2] = res.z;
|
||||
res0 = res0 * scale + trans;
|
||||
vOut->data[0][0] = res0.x;
|
||||
vOut->data[0][1] = res0.y;
|
||||
vOut->data[0][2] = res0.z;
|
||||
vOut->data[0][3] = w;
|
||||
|
||||
/* Remaining attributes are packed into sequential post-transform
|
||||
|
|
@ -65,7 +76,7 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
|
|||
* Skip 0 since we just did it above.
|
||||
* Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
|
||||
*/
|
||||
for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
|
||||
for (slot = 1; slot < num_attribs - 2; slot++) {
|
||||
float4 vec = results[slot];
|
||||
vOut->data[slot][0] = vec.x;
|
||||
vOut->data[slot][1] = vec.y;
|
||||
|
|
@ -79,12 +90,68 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
|
|||
vOut->data[slot][3]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void run_vertex_shader(float ainputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4],
|
||||
struct vertex_header *dests[VS_QUEUE_LENGTH],
|
||||
float *aconsts[4]
|
||||
int count)
|
||||
void from_array(float4 (*res)[32], float (*ainputs)[32][4],
|
||||
int count, int num_attribs)
|
||||
{
|
||||
float4 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS];
|
||||
float4 *consts;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
for (int j = 0; j < num_attribs; ++j) {
|
||||
float4 vec;
|
||||
vec.x = ainputs[i][j][0];
|
||||
vec.y = ainputs[i][j][1];
|
||||
vec.z = ainputs[i][j][2];
|
||||
vec.w = ainputs[i][j][3];
|
||||
res[i][j] = vec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void from_consts(float4 *res, float (*ainputs)[4],
|
||||
int count)
|
||||
{
|
||||
for (int i = 0; i < count; ++i) {
|
||||
float4 vec;
|
||||
vec.x = ainputs[i][0];
|
||||
vec.y = ainputs[i][1];
|
||||
vec.z = ainputs[i][2];
|
||||
vec.w = ainputs[i][3];
|
||||
res[i] = vec;
|
||||
}
|
||||
}
|
||||
|
||||
void to_array(float (*dests)[4], float4 *in, int num_attribs)
|
||||
{
|
||||
for (int i = 0; i < num_attribs; ++i) {
|
||||
float *rd = dests[i];
|
||||
float4 ri = in[i];
|
||||
rd[0] = ri.x;
|
||||
rd[1] = ri.y;
|
||||
rd[2] = ri.z;
|
||||
rd[3] = ri.w;
|
||||
}
|
||||
}
|
||||
|
||||
extern void execute_shader(float4 *dests, float4 *inputs,
|
||||
float4 *consts);
|
||||
|
||||
void run_vertex_shader(float (*ainputs)[32][4],
|
||||
float (*dests)[32][4],
|
||||
float (*aconsts)[4],
|
||||
int count,
|
||||
int num_attribs)
|
||||
{
|
||||
float4 inputs[16*32*4][32];
|
||||
float4 consts[32];
|
||||
float4 results[16*32*4][32];
|
||||
|
||||
printf("XXXXXXXXXXX run_vertex_shader\n");
|
||||
from_array(inputs, ainputs, count, num_attribs);
|
||||
from_consts(consts, aconsts, 32);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
float4 *in = inputs[i];
|
||||
float4 *res = results[i];
|
||||
to_array(dests[i], results[i], num_attribs);
|
||||
execute_shader(res, in, consts);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,15 +14,88 @@
|
|||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/ModuleProvider.h>
|
||||
#include <llvm/Pass.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/ParameterAttributes.h>
|
||||
#include <llvm/Support/PatternMatch.h>
|
||||
#include <llvm/ExecutionEngine/JIT.h>
|
||||
#include <llvm/ExecutionEngine/Interpreter.h>
|
||||
#include <llvm/ExecutionEngine/GenericValue.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/LinkAllPasses.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Analysis/LoopPass.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#include <iostream>
|
||||
|
||||
using namespace llvm;
|
||||
#include "llvm_base_shader.cpp"
|
||||
|
||||
|
||||
static inline void addPass(PassManager &PM, Pass *P) {
|
||||
// Add the pass to the pass manager...
|
||||
PM.add(P);
|
||||
}
|
||||
|
||||
static inline void AddStandardCompilePasses(PassManager &PM) {
|
||||
PM.add(createVerifierPass()); // Verify that input is correct
|
||||
|
||||
addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
|
||||
|
||||
// If the -strip-debug command line option was specified, do it.
|
||||
//if (StripDebug)
|
||||
// addPass(PM, createStripSymbolsPass(true));
|
||||
|
||||
addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst
|
||||
addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code
|
||||
addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas
|
||||
addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars
|
||||
addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs
|
||||
addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation
|
||||
addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination
|
||||
addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE
|
||||
addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE
|
||||
|
||||
addPass(PM, createPruneEHPass()); // Remove dead EH info
|
||||
|
||||
//if (!DisableInline)
|
||||
addPass(PM, createFunctionInliningPass()); // Inline small functions
|
||||
addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args
|
||||
|
||||
addPass(PM, createTailDuplicationPass()); // Simplify cfg by copying code
|
||||
addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl.
|
||||
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
|
||||
addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas
|
||||
addPass(PM, createInstructionCombiningPass()); // Combine silly seq's
|
||||
addPass(PM, createCondPropagationPass()); // Propagate conditionals
|
||||
|
||||
addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls
|
||||
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
|
||||
addPass(PM, createReassociatePass()); // Reassociate expressions
|
||||
addPass(PM, createLoopRotatePass());
|
||||
addPass(PM, createLICMPass()); // Hoist loop invariants
|
||||
addPass(PM, createLoopUnswitchPass()); // Unswitch loops.
|
||||
addPass(PM, createLoopIndexSplitPass()); // Index split loops.
|
||||
addPass(PM, createInstructionCombiningPass()); // Clean up after LICM/reassoc
|
||||
addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars
|
||||
addPass(PM, createLoopUnrollPass()); // Unroll small loops
|
||||
addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller
|
||||
addPass(PM, createGVNPass()); // Remove redundancies
|
||||
addPass(PM, createSCCPPass()); // Constant prop with SCCP
|
||||
|
||||
// Run instcombine after redundancy elimination to exploit opportunities
|
||||
// opened up by them.
|
||||
addPass(PM, createInstructionCombiningPass());
|
||||
addPass(PM, createCondPropagationPass()); // Propagate conditionals
|
||||
|
||||
addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores
|
||||
addPass(PM, createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
|
||||
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
|
||||
addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations
|
||||
addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types
|
||||
addPass(PM, createConstantMergePass()); // Merge dup global constants
|
||||
}
|
||||
|
||||
static void
|
||||
translate_declaration(llvm::Module *module,
|
||||
|
|
@ -341,7 +414,7 @@ translate_instruction(llvm::Module *module,
|
|||
static llvm::Module *
|
||||
tgsi_to_llvm(const struct tgsi_token *tokens)
|
||||
{
|
||||
llvm::Module *mod = new llvm::Module("tgsi");
|
||||
llvm::Module *mod = createBaseShader();
|
||||
struct tgsi_parse_context parse;
|
||||
struct tgsi_full_instruction fi;
|
||||
struct tgsi_full_declaration fd;
|
||||
|
|
@ -402,18 +475,33 @@ ga_llvm_from_tgsi(const struct tgsi_token *tokens)
|
|||
struct ga_llvm_prog *ga_llvm =
|
||||
(struct ga_llvm_prog *)malloc(sizeof(struct ga_llvm_prog));
|
||||
llvm::Module *mod = tgsi_to_llvm(tokens);
|
||||
|
||||
/* Run optimization passes over it */
|
||||
PassManager passes;
|
||||
// Add an appropriate TargetData instance for this module...
|
||||
passes.add(new TargetData(mod));
|
||||
AddStandardCompilePasses(passes);
|
||||
std::cout<<"Running optimization passes..."<<std::endl;
|
||||
bool b = passes.run(*mod);
|
||||
std::cout<<"\tModified mod = "<<b<<std::endl;
|
||||
|
||||
llvm::ExistingModuleProvider *mp =
|
||||
new llvm::ExistingModuleProvider(mod);
|
||||
//llvm::ExecutionEngine *ee =
|
||||
// llvm::ExecutionEngine::create(mp, false);
|
||||
llvm::ExecutionEngine *ee =
|
||||
llvm::ExecutionEngine::create(mp, false);
|
||||
|
||||
ga_llvm->module = mod;
|
||||
ga_llvm->engine = 0;//ee;
|
||||
ga_llvm->engine = ee;
|
||||
fprintf(stderr, "DUMPX \n");
|
||||
//tgsi_dump(tokens, TGSI_DUMP_VERBOSE);
|
||||
tgsi_dump(tokens, 0);
|
||||
fprintf(stderr, "DUMPEND \n");
|
||||
|
||||
Function *func = mod->getFunction("run_vertex_shader");
|
||||
std::cout << "run_vertex_shader = "<<func;
|
||||
ga_llvm->function = ee->getPointerToFunctionOrStub(func);
|
||||
std::cout << " -- FUNC is " <<ga_llvm->function;
|
||||
|
||||
return ga_llvm;
|
||||
}
|
||||
|
||||
|
|
@ -423,6 +511,7 @@ void ga_llvm_prog_delete(struct ga_llvm_prog *prog)
|
|||
delete mod;
|
||||
prog->module = 0;
|
||||
prog->engine = 0;
|
||||
prog->function = 0;
|
||||
free(prog);
|
||||
}
|
||||
|
||||
|
|
@ -430,24 +519,12 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
|
|||
float (*inputs)[32][4],
|
||||
void *dests[16*32*4],
|
||||
float (*consts)[4],
|
||||
int count)
|
||||
int count,
|
||||
int num_attribs)
|
||||
{
|
||||
//std::cout << "START "<<std::endl;
|
||||
llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
|
||||
llvm::Function *func = mod->getFunction("main");
|
||||
llvm::ExecutionEngine *ee = static_cast<llvm::ExecutionEngine*>(prog->engine);
|
||||
std::cout << "---- START LLVM Execution "<<std::endl;
|
||||
|
||||
std::vector<llvm::GenericValue> args(0);
|
||||
//args[0] = GenericValue(&st);
|
||||
//std::cout << "Mod is "<<*mod;
|
||||
//std::cout << "\n\nRunning llvm: " << std::endl;
|
||||
if (func) {
|
||||
std::cout << "Func is "<<func;
|
||||
llvm::GenericValue gv = ee->runFunction(func, args);
|
||||
}
|
||||
|
||||
//delete ee;
|
||||
//delete mp;
|
||||
|
||||
std::cout << "---- END LLVM Execution "<<std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ struct tgsi_sampler;
|
|||
struct ga_llvm_prog {
|
||||
void *module;
|
||||
void *engine;
|
||||
void *function;
|
||||
};
|
||||
struct ga_llvm_prog *
|
||||
ga_llvm_from_tgsi(const struct tgsi_token *tokens);
|
||||
|
|
@ -22,7 +23,8 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
|
|||
float (*inputs)[32][4],
|
||||
void *dests[16*32*4],
|
||||
float (*consts)[4],
|
||||
int count);
|
||||
int count,
|
||||
int num_attribs);
|
||||
|
||||
#if defined __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue