Generate the base shader.

This commit is contained in:
Zack Rusin 2007-10-15 08:12:22 -04:00
parent 2dbba8b024
commit 9e6d58fac2
4 changed files with 194 additions and 47 deletions

View file

@ -112,7 +112,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
float (*consts)[4] = (float (*)[4]) draw->mapped_constants;
struct ga_llvm_prog *prog = draw->vertex_shader->state->llvm_prog;
fprintf(stderr, "XX q(%d) ", draw->vs.queue_nr);
fprintf(stderr, "--- XX q(%d) ", draw->vs.queue_nr);
/* fetch the inputs */
for (i = 0; i < draw->vs.queue_nr; ++i) {
@ -123,7 +123,8 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
/* batch execute the shaders on all the vertices */
ga_llvm_prog_exec(prog, inputs, dests, consts,
draw->vs.queue_nr);
draw->vs.queue_nr,
draw->vertex_info.num_attribs);
draw->vs.queue_nr = 0;
}

View file

@ -1,20 +1,29 @@
/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm-dis */
/* clang --emit-llvm llvm_builtins.c |llvm-as |opt -std-compile-opts |llvm2cpp -for=Shader -gen-module -funcname=createBaseShader */
typedef __attribute__(( ocu_vector_type(4) )) float float4;
#if 0
//clang doesn't suppoer "struct->member" notation yet
struct vertex_header {
unsigned clipmask:12;
unsigned edgeflag:1;
unsigned pad:3;
unsigned vertex_id:16;
inline float4 compute_clip(float4 vec, float4 scale, float4 trans)
{
return vec*scale + trans;
}
float clip[4];
float data[][4];
};
inline float
dot4(const float4 a, const float4 b)
dot4(float4 a, float4 b)
{
float4 c = a*b;
return c.x + c.y + c.z + c.w;
}
inline unsigned
compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
compute_clipmask(float4 clip, float4 (*plane), unsigned nr)
{
unsigned mask = 0;
unsigned i;
@ -29,7 +38,8 @@ compute_clipmask(float4 clip, const float4 (*plane), unsigned nr)
inline void collect_results(float4 *results, struct vertex_header *vOut,
float4 *planes, int nr_planes,
float4 scale, float4 trans)
float4 scale, float4 trans,
int num_attribs)
{
/* store results */
unsigned slot;
@ -38,13 +48,14 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
/* Handle attr[0] (position) specially:
*/
float4 res0 = results[0];
x = vOut->clip[0] = clip.x;
y = vOut->clip[1] = clip.y;
z = vOut->clip[2] = clip.z;
w = vOut->clip[3] = clip.w;
float *clip = vOut->clip;
x = clip[0] = res0.x;
y = clip[1] = res0.y;
z = clip[2] = res0.z;
w = clip[3] = res0.w;
vOut[i]->clipmask = compute_clipmask(res0, planes, nr_planes);
vOut[i]->edgeflag = 1;
vOut->clipmask = compute_clipmask(res0, planes, nr_planes);
vOut->edgeflag = 1;
/* divide by w */
w = 1.0f / w;
@ -54,10 +65,10 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
res0.x = x; res0.y = y; res0.z = z; res0.w = 1;
/* Viewport mapping */
res = res * scale + trans;
vOut->data[0][0] = res.x;
vOut->data[0][1] = res.y;
vOut->data[0][2] = res.z;
res0 = res0 * scale + trans;
vOut->data[0][0] = res0.x;
vOut->data[0][1] = res0.y;
vOut->data[0][2] = res0.z;
vOut->data[0][3] = w;
/* Remaining attributes are packed into sequential post-transform
@ -65,7 +76,7 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
* Skip 0 since we just did it above.
* Subtract two because of the VERTEX_HEADER, CLIP_POS attribs.
*/
for (slot = 1; slot < draw->vertex_info.num_attribs - 2; slot++) {
for (slot = 1; slot < num_attribs - 2; slot++) {
float4 vec = results[slot];
vOut->data[slot][0] = vec.x;
vOut->data[slot][1] = vec.y;
@ -79,12 +90,68 @@ inline void collect_results(float4 *results, struct vertex_header *vOut,
vOut->data[slot][3]);
}
}
#endif
void run_vertex_shader(float ainputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4],
struct vertex_header *dests[VS_QUEUE_LENGTH],
float *aconsts[4]
int count)
void from_array(float4 (*res)[32], float (*ainputs)[32][4],
int count, int num_attribs)
{
float4 inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS];
float4 *consts;
for (int i = 0; i < count; ++i) {
for (int j = 0; j < num_attribs; ++j) {
float4 vec;
vec.x = ainputs[i][j][0];
vec.y = ainputs[i][j][1];
vec.z = ainputs[i][j][2];
vec.w = ainputs[i][j][3];
res[i][j] = vec;
}
}
}
void from_consts(float4 *res, float (*ainputs)[4],
int count)
{
for (int i = 0; i < count; ++i) {
float4 vec;
vec.x = ainputs[i][0];
vec.y = ainputs[i][1];
vec.z = ainputs[i][2];
vec.w = ainputs[i][3];
res[i] = vec;
}
}
void to_array(float (*dests)[4], float4 *in, int num_attribs)
{
for (int i = 0; i < num_attribs; ++i) {
float *rd = dests[i];
float4 ri = in[i];
rd[0] = ri.x;
rd[1] = ri.y;
rd[2] = ri.z;
rd[3] = ri.w;
}
}
extern void execute_shader(float4 *dests, float4 *inputs,
float4 *consts);
void run_vertex_shader(float (*ainputs)[32][4],
float (*dests)[32][4],
float (*aconsts)[4],
int count,
int num_attribs)
{
float4 inputs[16*32*4][32];
float4 consts[32];
float4 results[16*32*4][32];
printf("XXXXXXXXXXX run_vertex_shader\n");
from_array(inputs, ainputs, count, num_attribs);
from_consts(consts, aconsts, 32);
for (int i = 0; i < count; ++i) {
float4 *in = inputs[i];
float4 *res = results[i];
to_array(dests[i], results[i], num_attribs);
execute_shader(res, in, consts);
}
}

View file

@ -14,15 +14,88 @@
#include <llvm/DerivedTypes.h>
#include <llvm/Instructions.h>
#include <llvm/ModuleProvider.h>
#include <llvm/Pass.h>
#include <llvm/PassManager.h>
#include <llvm/ParameterAttributes.h>
#include <llvm/Support/PatternMatch.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/ExecutionEngine/Interpreter.h>
#include <llvm/ExecutionEngine/GenericValue.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/LinkAllPasses.h>
#include <llvm/Analysis/Verifier.h>
#include <llvm/Analysis/LoopPass.h>
#include <llvm/Target/TargetData.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <iostream>
using namespace llvm;
#include "llvm_base_shader.cpp"
static inline void addPass(PassManager &PM, Pass *P) {
// Add the pass to the pass manager...
PM.add(P);
}
static inline void AddStandardCompilePasses(PassManager &PM) {
PM.add(createVerifierPass()); // Verify that input is correct
addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
// If the -strip-debug command line option was specified, do it.
//if (StripDebug)
// addPass(PM, createStripSymbolsPass(true));
addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst
addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code
addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas
addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars
addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs
addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation
addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination
addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE
addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE
addPass(PM, createPruneEHPass()); // Remove dead EH info
//if (!DisableInline)
addPass(PM, createFunctionInliningPass()); // Inline small functions
addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args
addPass(PM, createTailDuplicationPass()); // Simplify cfg by copying code
addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl.
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas
addPass(PM, createInstructionCombiningPass()); // Combine silly seq's
addPass(PM, createCondPropagationPass()); // Propagate conditionals
addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
addPass(PM, createReassociatePass()); // Reassociate expressions
addPass(PM, createLoopRotatePass());
addPass(PM, createLICMPass()); // Hoist loop invariants
addPass(PM, createLoopUnswitchPass()); // Unswitch loops.
addPass(PM, createLoopIndexSplitPass()); // Index split loops.
addPass(PM, createInstructionCombiningPass()); // Clean up after LICM/reassoc
addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars
addPass(PM, createLoopUnrollPass()); // Unroll small loops
addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller
addPass(PM, createGVNPass()); // Remove redundancies
addPass(PM, createSCCPPass()); // Constant prop with SCCP
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
addPass(PM, createInstructionCombiningPass());
addPass(PM, createCondPropagationPass()); // Propagate conditionals
addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores
addPass(PM, createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs
addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations
addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types
addPass(PM, createConstantMergePass()); // Merge dup global constants
}
static void
translate_declaration(llvm::Module *module,
@ -341,7 +414,7 @@ translate_instruction(llvm::Module *module,
static llvm::Module *
tgsi_to_llvm(const struct tgsi_token *tokens)
{
llvm::Module *mod = new llvm::Module("tgsi");
llvm::Module *mod = createBaseShader();
struct tgsi_parse_context parse;
struct tgsi_full_instruction fi;
struct tgsi_full_declaration fd;
@ -402,18 +475,33 @@ ga_llvm_from_tgsi(const struct tgsi_token *tokens)
struct ga_llvm_prog *ga_llvm =
(struct ga_llvm_prog *)malloc(sizeof(struct ga_llvm_prog));
llvm::Module *mod = tgsi_to_llvm(tokens);
/* Run optimization passes over it */
PassManager passes;
// Add an appropriate TargetData instance for this module...
passes.add(new TargetData(mod));
AddStandardCompilePasses(passes);
std::cout<<"Running optimization passes..."<<std::endl;
bool b = passes.run(*mod);
std::cout<<"\tModified mod = "<<b<<std::endl;
llvm::ExistingModuleProvider *mp =
new llvm::ExistingModuleProvider(mod);
//llvm::ExecutionEngine *ee =
// llvm::ExecutionEngine::create(mp, false);
llvm::ExecutionEngine *ee =
llvm::ExecutionEngine::create(mp, false);
ga_llvm->module = mod;
ga_llvm->engine = 0;//ee;
ga_llvm->engine = ee;
fprintf(stderr, "DUMPX \n");
//tgsi_dump(tokens, TGSI_DUMP_VERBOSE);
tgsi_dump(tokens, 0);
fprintf(stderr, "DUMPEND \n");
Function *func = mod->getFunction("run_vertex_shader");
std::cout << "run_vertex_shader = "<<func;
ga_llvm->function = ee->getPointerToFunctionOrStub(func);
std::cout << " -- FUNC is " <<ga_llvm->function;
return ga_llvm;
}
@ -423,6 +511,7 @@ void ga_llvm_prog_delete(struct ga_llvm_prog *prog)
delete mod;
prog->module = 0;
prog->engine = 0;
prog->function = 0;
free(prog);
}
@ -430,24 +519,12 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
float (*inputs)[32][4],
void *dests[16*32*4],
float (*consts)[4],
int count)
int count,
int num_attribs)
{
//std::cout << "START "<<std::endl;
llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
llvm::Function *func = mod->getFunction("main");
llvm::ExecutionEngine *ee = static_cast<llvm::ExecutionEngine*>(prog->engine);
std::cout << "---- START LLVM Execution "<<std::endl;
std::vector<llvm::GenericValue> args(0);
//args[0] = GenericValue(&st);
//std::cout << "Mod is "<<*mod;
//std::cout << "\n\nRunning llvm: " << std::endl;
if (func) {
std::cout << "Func is "<<func;
llvm::GenericValue gv = ee->runFunction(func, args);
}
//delete ee;
//delete mp;
std::cout << "---- END LLVM Execution "<<std::endl;
return 0;
}

View file

@ -12,6 +12,7 @@ struct tgsi_sampler;
struct ga_llvm_prog {
void *module;
void *engine;
void *function;
};
struct ga_llvm_prog *
ga_llvm_from_tgsi(const struct tgsi_token *tokens);
@ -22,7 +23,8 @@ int ga_llvm_prog_exec(struct ga_llvm_prog *prog,
float (*inputs)[32][4],
void *dests[16*32*4],
float (*consts)[4],
int count);
int count,
int num_attribs);
#if defined __cplusplus
} // extern "C"