r300g: copy the compiler from r300c

What a beast.

r300g doesn't depend on files from r300c anymore, so r300c is now left
to its own fate. BTW 'make test' can be invoked from the gallium/r300
directory to run some compiler unit tests.
This commit is contained in:
Marek Olšák 2011-07-26 21:15:05 +02:00
parent 860c51d827
commit 1c2c4ddbd1
69 changed files with 17038 additions and 25 deletions

View file

@ -26,19 +26,51 @@ C_SOURCES = \
r300_texture.c \ r300_texture.c \
r300_texture_desc.c \ r300_texture_desc.c \
r300_tgsi_to_rc.c \ r300_tgsi_to_rc.c \
r300_transfer.c r300_transfer.c \
\
compiler/radeon_code.c \
compiler/radeon_compiler.c \
compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \
compiler/radeon_program_alu.c \
compiler/radeon_program_pair.c \
compiler/radeon_program_tex.c \
compiler/radeon_pair_translate.c \
compiler/radeon_pair_schedule.c \
compiler/radeon_pair_regalloc.c \
compiler/radeon_pair_dead_sources.c \
compiler/radeon_dataflow.c \
compiler/radeon_dataflow_deadcode.c \
compiler/radeon_dataflow_swizzles.c \
compiler/radeon_list.c \
compiler/radeon_optimize.c \
compiler/radeon_remove_constants.c \
compiler/radeon_rename_regs.c \
compiler/radeon_variable.c \
compiler/r3xx_fragprog.c \
compiler/r300_fragprog.c \
compiler/r300_fragprog_swizzle.c \
compiler/r300_fragprog_emit.c \
compiler/r500_fragprog.c \
compiler/r500_fragprog_emit.c \
compiler/r3xx_vertprog.c \
compiler/r3xx_vertprog_dump.c \
compiler/memory_pool.c \
\
$(TOP)/src/glsl/ralloc.c \
$(TOP)/src/mesa/program/register_allocate.c
LIBRARY_INCLUDES = \ LIBRARY_INCLUDES = \
-I$(TOP)/src/mesa/drivers/dri/r300/compiler \ -I$(TOP)/include \
-I$(TOP)/include -I$(TOP)/src/mesa \
-I$(TOP)/src/glsl
COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
EXTRA_OBJECTS = \
$(COMPILER_ARCHIVE)
include ../../Makefile.template include ../../Makefile.template
.PHONY: $(COMPILER_ARCHIVE) test: default
$(COMPILER_ARCHIVE): @$(MAKE) -s -C compiler/tests/
$(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler

View file

@ -1,13 +1,11 @@
Import('*') Import('*')
r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
env = env.Clone() env = env.Clone()
# add the paths for r300compiler
env.Append(CPPPATH = [ env.Append(CPPPATH = [
'#/src/mesa/drivers/dri/r300/compiler',
'#/include', '#/include',
'#/src/mesa', '#/src/mesa',
'#/src/glsl',
'#/src/mapi',
]) ])
r300 = env.ConvenienceLibrary( r300 = env.ConvenienceLibrary(
@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary(
'r300_texture_desc.c', 'r300_texture_desc.c',
'r300_tgsi_to_rc.c', 'r300_tgsi_to_rc.c',
'r300_transfer.c', 'r300_transfer.c',
] + r300compiler) + r300compiler 'compiler/radeon_code.c',
'compiler/radeon_compiler.c',
'compiler/radeon_compiler_util.c',
'compiler/radeon_program.c',
'compiler/radeon_program_print.c',
'compiler/radeon_opcodes.c',
'compiler/radeon_program_alu.c',
'compiler/radeon_program_pair.c',
'compiler/radeon_program_tex.c',
'compiler/radeon_pair_translate.c',
'compiler/radeon_pair_schedule.c',
'compiler/radeon_pair_regalloc.c',
'compiler/radeon_pair_dead_sources.c',
'compiler/radeon_optimize.c',
'compiler/radeon_remove_constants.c',
'compiler/radeon_rename_regs.c',
'compiler/radeon_emulate_branches.c',
'compiler/radeon_emulate_loops.c',
'compiler/radeon_dataflow.c',
'compiler/radeon_dataflow_deadcode.c',
'compiler/radeon_dataflow_swizzles.c',
'compiler/radeon_variable.c',
'compiler/radeon_list.c',
'compiler/r3xx_fragprog.c',
'compiler/r300_fragprog.c',
'compiler/r300_fragprog_swizzle.c',
'compiler/r300_fragprog_emit.c',
'compiler/r500_fragprog.c',
'compiler/r500_fragprog_emit.c',
'compiler/r3xx_vertprog.c',
'compiler/r3xx_vertprog_dump.c',
'compiler/memory_pool.c',
'#/src/glsl/ralloc.c',
'#/src/mesa/program/register_allocate.c'
])
env.Alias('r300', r300) env.Alias('r300', r300)

View file

@ -0,0 +1,97 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "memory_pool.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#define POOL_LARGE_ALLOC 4096
#define POOL_ALIGN 8
struct memory_block {
struct memory_block * next;
};
void memory_pool_init(struct memory_pool * pool)
{
memset(pool, 0, sizeof(struct memory_pool));
}
void memory_pool_destroy(struct memory_pool * pool)
{
while(pool->blocks) {
struct memory_block * block = pool->blocks;
pool->blocks = block->next;
free(block);
}
}
static void refill_pool(struct memory_pool * pool)
{
unsigned int blocksize = pool->total_allocated;
struct memory_block * newblock;
if (!blocksize)
blocksize = 2*POOL_LARGE_ALLOC;
newblock = (struct memory_block*)malloc(blocksize);
newblock->next = pool->blocks;
pool->blocks = newblock;
pool->head = (unsigned char*)(newblock + 1);
pool->end = ((unsigned char*)newblock) + blocksize;
pool->total_allocated += blocksize;
}
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
{
if (bytes < POOL_LARGE_ALLOC) {
void * ptr;
if (pool->head + bytes > pool->end)
refill_pool(pool);
assert(pool->head + bytes <= pool->end);
ptr = pool->head;
pool->head += bytes;
pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
return ptr;
} else {
struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
block->next = pool->blocks;
pool->blocks = block;
return (block + 1);
}
}

View file

@ -0,0 +1,80 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#ifndef MEMORY_POOL_H
#define MEMORY_POOL_H
struct memory_block;
/**
* Provides a pool of memory that can quickly be allocated from, at the
* cost of being unable to explicitly free one of the allocated blocks.
* Instead, the entire pool can be freed at once.
*
* The idea is to allow one to quickly allocate a flexible amount of
* memory during operations like shader compilation while avoiding
* reference counting headaches.
*/
struct memory_pool {
unsigned char * head;
unsigned char * end;
unsigned int total_allocated;
struct memory_block * blocks;
};
void memory_pool_init(struct memory_pool * pool);
void memory_pool_destroy(struct memory_pool * pool);
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
/**
* Generic helper for growing an array that has separate size/count
* and reserved counters to accomodate up to num new element.
*
* type * Array;
* unsigned int Size;
* unsigned int Reserved;
*
* memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
* assert(Size + k < Reserved);
*
* \note Size is not changed by this macro.
*
* \warning Array, Size, Reserved have to be lvalues and may be evaluated
* several times.
*/
#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
unsigned int _num = (num); \
if ((size) + _num > (reserved)) { \
unsigned int newreserve = (reserved) * 2; \
type * newarray; \
if (newreserve < _num) \
newreserve = 4 * _num; /* arbitrary heuristic */ \
newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
memcpy(newarray, (array), (size) * sizeof(type)); \
(array) = newarray; \
(reserved) = newreserve; \
} \
} while(0)
#endif /* MEMORY_POOL_H */

View file

@ -0,0 +1,338 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "r300_fragprog.h"
#include <stdio.h>
#include "../r300_reg.h"
static void presub_string(char out[10], unsigned int inst)
{
switch(inst & 0x600000){
case R300_ALU_SRCP_1_MINUS_2_SRC0:
sprintf(out, "bias");
break;
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
sprintf(out, "sub");
break;
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
sprintf(out, "add");
break;
case R300_ALU_SRCP_1_MINUS_SRC0:
sprintf(out, "inv ");
break;
}
}
static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
{
return (r400_ext_addr & bit) ? 1 << 5 : 0;
}
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
int n, i, j;
static int pc = 0;
fprintf(stderr, "pc=%d*************************************\n", pc++);
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
if (c->is_r400) {
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
}
for (n = 0; n <= (code->config & 3); n++) {
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
"alu_end: %u, tex_end: %d (code_addr: %08x)\n", n,
alu_offset, tex_offset, alu_end, tex_end, code_addr);
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
fprintf(stderr, " TEX:\n");
for (i = tex_offset;
i <= tex_offset + tex_end;
++i) {
const char *instr;
switch ((code->tex.
inst[i] >> R300_TEX_INST_SHIFT) &
15) {
case R300_TEX_OP_LD:
instr = "TEX";
break;
case R300_TEX_OP_KIL:
instr = "KIL";
break;
case R300_TEX_OP_TXP:
instr = "TXP";
break;
case R300_TEX_OP_TXB:
instr = "TXB";
break;
default:
instr = "UNKNOWN";
}
fprintf(stderr,
" %s t%i, %c%i, texture[%i] (%08x)\n",
instr,
(code->tex.
inst[i] >> R300_DST_ADDR_SHIFT) & 31,
't',
(code->tex.
inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
(code->tex.
inst[i] & R300_TEX_ID_MASK) >>
R300_TEX_ID_SHIFT,
code->tex.inst[i]);
}
}
for (i = alu_offset;
i <= alu_offset + alu_end; ++i) {
char srcc[4][10], dstc[20];
char srca[4][10], dsta[20];
char argc[3][20];
char arga[3][20];
char flags[5], tmp[10];
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
sprintf(srca[j], "%c%i",
(rega & 32) ? 'c' : 't', (rega & 31) | msba);
}
dstc[0] = 0;
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_RGB_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
sprintf(dstc, "t%i.%s ",
((code->alu.inst[i].
rgb_addr >> R300_ALU_DSTC_SHIFT)
& 31) | msb,
flags);
}
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(tmp, "o%i.%s",
(code->alu.inst[i].
rgb_addr >> 29) & 3,
flags);
strcat(dstc, tmp);
}
/* Presub */
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
presub_string(srca[3], code->alu.inst[i].alpha_inst);
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_A_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
sprintf(dsta, "t%i.w ",
((code->alu.inst[i].
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
| msb);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
(code->alu.inst[i].
alpha_addr >> 25) & 3);
strcat(dsta, tmp);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
strcat(dsta, "Z");
}
fprintf(stderr,
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], srcc[3], dstc,
code->alu.inst[i].rgb_addr, srca[0], srca[1],
srca[2], srca[3], dsta,
code->alu.inst[i].alpha_addr);
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
int rega = code->alu.inst[i].alpha_inst >> (j * 7);
int d;
char buf[20];
d = regc & 31;
if (d < 12) {
switch (d % 4) {
case R300_ALU_ARGC_SRC0C_XYZ:
sprintf(buf, "%s.xyz",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_XXX:
sprintf(buf, "%s.xxx",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_YYY:
sprintf(buf, "%s.yyy",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_ZZZ:
sprintf(buf, "%s.zzz",
srcc[d / 4]);
break;
}
} else if (d < 15) {
sprintf(buf, "%s.www", srca[d - 12]);
} else if (d < 20 ) {
switch(d) {
case R300_ALU_ARGC_SRCP_XYZ:
sprintf(buf, "srcp.xyz");
break;
case R300_ALU_ARGC_SRCP_XXX:
sprintf(buf, "srcp.xxx");
break;
case R300_ALU_ARGC_SRCP_YYY:
sprintf(buf, "srcp.yyy");
break;
case R300_ALU_ARGC_SRCP_ZZZ:
sprintf(buf, "srcp.zzz");
break;
case R300_ALU_ARGC_SRCP_WWW:
sprintf(buf, "srcp.www");
break;
}
} else if (d == 20) {
sprintf(buf, "0.0");
} else if (d == 21) {
sprintf(buf, "1.0");
} else if (d == 22) {
sprintf(buf, "0.5");
} else if (d >= 23 && d < 32) {
d -= 23;
switch (d / 3) {
case 0:
sprintf(buf, "%s.yzx",
srcc[d % 3]);
break;
case 1:
sprintf(buf, "%s.zxy",
srcc[d % 3]);
break;
case 2:
sprintf(buf, "%s.Wzy",
srcc[d % 3]);
break;
}
} else {
sprintf(buf, "%i", d);
}
sprintf(argc[j], "%s%s%s%s",
(regc & 32) ? "-" : "",
(regc & 64) ? "|" : "",
buf, (regc & 64) ? "|" : "");
d = rega & 31;
if (d < 9) {
sprintf(buf, "%s.%c", srcc[d / 3],
'x' + (char)(d % 3));
} else if (d < 12) {
sprintf(buf, "%s.w", srca[d - 9]);
} else if (d < 16) {
switch(d) {
case R300_ALU_ARGA_SRCP_X:
sprintf(buf, "srcp.x");
break;
case R300_ALU_ARGA_SRCP_Y:
sprintf(buf, "srcp.y");
break;
case R300_ALU_ARGA_SRCP_Z:
sprintf(buf, "srcp.z");
break;
case R300_ALU_ARGA_SRCP_W:
sprintf(buf, "srcp.w");
break;
}
} else if (d == 16) {
sprintf(buf, "0.0");
} else if (d == 17) {
sprintf(buf, "1.0");
} else if (d == 18) {
sprintf(buf, "0.5");
} else {
sprintf(buf, "%i", d);
}
sprintf(arga[j], "%s%s%s%s",
(rega & 32) ? "-" : "",
(rega & 64) ? "|" : "",
buf, (rega & 64) ? "|" : "");
}
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
code->alu.inst[i].rgb_inst,
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
"NOP" : "",
arga[0], arga[1],arga[2],
code->alu.inst[i].alpha_inst);
}
}
}

View file

@ -0,0 +1,44 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R300_FRAGPROG_H_
#define __R300_FRAGPROG_H_
#include "radeon_compiler.h"
#include "radeon_program.h"
extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
#endif

View file

@ -0,0 +1,536 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*
* Emit the r300_fragment_program_code that can be understood by the hardware.
* Input is a pre-transformed radeon_program.
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*/
#include "r300_fragprog.h"
#include "../r300_reg.h"
#include "radeon_program_pair.h"
#include "r300_fragprog_swizzle.h"
struct r300_emit_state {
struct r300_fragment_program_compiler * compiler;
unsigned current_node : 2;
unsigned node_first_tex : 8;
unsigned node_first_alu : 8;
uint32_t node_flags;
};
#define PROG_CODE \
struct r300_fragment_program_compiler *c = emit->compiler; \
struct r300_fragment_program_code *code = &c->code->code.r300
#define error(fmt, args...) do { \
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
static unsigned int get_msbs_alu(unsigned int bits)
{
return (bits >> 6) & 0x7;
}
/**
* @param lsbs The number of least significant bits
*/
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
{
return (bits >> lsbs) & 0x15;
}
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
/**
* Mark a temporary register as used.
*/
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
{
if (index > code->pixsize)
code->pixsize = index;
}
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
if (!src.Used)
return 0;
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index & 0x1f;
}
return 0;
}
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
default:
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
}
}
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
default:
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
}
}
/**
* Emit one paired ALU instruction.
*/
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
int ip;
int j;
PROG_CODE;
if (code->alu.length >= c->Base.max_alu_insts) {
error("Too many ALU instructions");
return 0;
}
ip = code->alu.length++;
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
/* Set the RGB address */
unsigned int src = use_source(code, inst->RGB.Src[j]);
unsigned int arg;
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
code->alu.inst[ip].rgb_addr |= src << (6*j);
/* Set the Alpha address */
src = use_source(code, inst->Alpha.Src[j]);
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
code->alu.inst[ip].alpha_addr |= src << (6*j);
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
/* Presubtract */
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
if (inst->RGB.Saturate)
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
code->alu.inst[ip].rgb_addr |=
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
code->alu.inst[ip].rgb_addr |=
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
R300_RGB_TARGET(inst->RGB.Target);
emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
code->alu.inst[ip].alpha_addr |=
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
R300_ALPHA_TARGET(inst->Alpha.Target);
emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
emit->node_flags |= R300_W_OUT;
c->code->writes_depth = 1;
}
if (inst->Nop)
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
return 1;
}
/**
* Finish the current node without advancing to the next one.
*/
static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
unsigned alu_offset;
unsigned alu_end;
unsigned tex_offset;
unsigned tex_end;
unsigned int alu_offset_msbs, alu_end_msbs;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct rc_pair_instruction inst;
memset(&inst, 0, sizeof(inst));
if (!emit_alu(emit, &inst))
return 0;
}
alu_offset = emit->node_first_alu;
alu_end = code->alu.length - alu_offset - 1;
tex_offset = emit->node_first_tex;
tex_end = code->tex.length - tex_offset - 1;
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
error("Node %i has no TEX instructions", emit->current_node);
return 0;
}
tex_end = 0;
} else {
if (emit->current_node == 0)
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
}
/* Write the config register.
* Note: The order in which the words for each node are written
* is not correct here and needs to be fixed up once we're entirely
* done
*
* Also note that the register specification from AMD is slightly
* incorrect in its description of this register. */
code->code_addr[emit->current_node] =
((alu_offset << R300_ALU_START_SHIFT)
& R300_ALU_START_MASK)
| ((alu_end << R300_ALU_SIZE_SHIFT)
& R300_ALU_SIZE_MASK)
| ((tex_offset << R300_TEX_START_SHIFT)
& R300_TEX_START_MASK)
| ((tex_end << R300_TEX_SIZE_SHIFT)
& R300_TEX_SIZE_MASK)
| emit->node_flags
| (get_msbs_tex(tex_offset, 5)
<< R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 5)
<< R400_TEX_SIZE_MSB_SHIFT)
;
/* Write r400 extended instruction fields. These will be ignored on
* r300 cards. */
alu_offset_msbs = get_msbs_alu(alu_offset);
alu_end_msbs = get_msbs_alu(alu_end);
switch(emit->current_node) {
case 0:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
break;
case 1:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
break;
case 2:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
break;
case 3:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
break;
}
return 1;
}
/**
* Begin a block of texture instructions.
* Create the necessary indirection.
*/
static int begin_tex(struct r300_emit_state * emit)
{
PROG_CODE;
if (code->alu.length == emit->node_first_alu &&
code->tex.length == emit->node_first_tex) {
return 1;
}
if (emit->current_node == 3) {
error("Too many texture indirections");
return 0;
}
if (!finish_node(emit))
return 0;
emit->current_node++;
emit->node_first_tex = code->tex.length;
emit->node_first_alu = code->alu.length;
emit->node_flags = 0;
return 1;
}
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
unsigned int unit;
unsigned int dest;
unsigned int opcode;
PROG_CODE;
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
error("Too many TEX instructions");
return 0;
}
unit = inst->U.I.TexSrcUnit;
dest = inst->U.I.DstReg.Index;
switch(inst->U.I.Opcode) {
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
return 0;
}
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
use_temporary(code, inst->U.I.SrcReg[0].Index);
code->tex.inst[code->tex.length++] =
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
& R300_SRC_ADDR_MASK)
| ((dest << R300_DST_ADDR_SHIFT)
& R300_DST_ADDR_MASK)
| (unit << R300_TEX_ID_SHIFT)
| (opcode << R300_TEX_INST_SHIFT)
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
R400_SRC_ADDR_EXT_BIT : 0)
| (dest >= R300_PFS_NUM_TEMP_REGS ?
R400_DST_ADDR_EXT_BIT : 0)
;
return 1;
}
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
unsigned int tex_end;
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
memset(code, 0, sizeof(struct r300_fragment_program_code));
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
inst = inst->Next) {
if (inst->Type == RC_INSTRUCTION_NORMAL) {
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
begin_tex(&emit);
continue;
}
emit_tex(&emit, inst);
} else {
emit_alu(&emit, &inst->U.P);
}
}
if (code->pixsize >= compiler->Base.max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
if (compiler->Base.Error)
return;
/* Finish the program */
finish_node(&emit);
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
/* Set r400 extended instruction fields. These values will be ignored
* on r300 cards. */
code->r400_code_offset_ext |=
(get_msbs_alu(0)
<< R400_ALU_OFFSET_MSB_SHIFT)
| (get_msbs_alu(code->alu.length - 1)
<< R400_ALU_SIZE_MSB_SHIFT);
tex_end = code->tex.length ? code->tex.length - 1 : 0;
code->code_offset =
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
& R300_PFS_CNTL_ALU_OFFSET_MASK)
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
& R300_PFS_CNTL_ALU_END_MASK)
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
& R300_PFS_CNTL_TEX_OFFSET_MASK)
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
& R300_PFS_CNTL_TEX_END_MASK)
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
;
if (emit.current_node < 3) {
int shift = 3 - emit.current_node;
int i;
for(i = emit.current_node; i >= 0; --i)
code->code_addr[shift + i] = code->code_addr[i];
for(i = 0; i < shift; ++i)
code->code_addr[i] = 0;
}
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
|| code->alu.length > R300_PFS_MAX_ALU_INST
|| code->tex.length > R300_PFS_MAX_TEX_INST) {
code->r390_mode = 1;
}
}

View file

@ -0,0 +1,243 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* @file
* Utilities to deal with the somewhat odd restriction on R300 fragment
* program swizzles.
*/
#include "r300_fragprog_swizzle.h"
#include <stdio.h>
#include "../r300_reg.h"
#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
struct swizzle_data {
unsigned int hash; /**< swizzle value this matches */
unsigned int base; /**< base value for hw swizzle */
unsigned int stride; /**< difference in base between arg0/1/2 */
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
};
static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
/**
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
*/
static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
{
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data* sd = &native_swizzles[i];
for(comp = 0; comp < 3; ++comp) {
unsigned int swz = GET_SWZ(swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != GET_SWZ(sd->hash, comp))
break;
}
if (comp == 3)
return sd;
}
return 0;
}
/**
* Determines if the given swizzle is valid for r300/r400. In most situations
* it is better to use r300_swizzle_is_native() which can be accesed via
* struct radeon_compiler *c; c->SwizzleCaps->IsNative().
*/
int r300_swizzle_is_native_basic(unsigned int swizzle)
{
if(lookup_native_swizzle(swizzle))
return 1;
else
return 0;
}
/**
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
const struct swizzle_data* sd;
unsigned int relevant;
int j;
if (opcode == RC_OPCODE_KIL ||
opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP) {
if (reg.Abs || reg.Negate)
return 0;
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(reg.Swizzle, j);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != j)
return 0;
}
return 1;
}
relevant = 0;
for(j = 0; j < 3; ++j)
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
relevant |= 1 << j;
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
sd = lookup_native_swizzle(reg.Swizzle);
if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
return 0;
return 1;
}
static void r300_swizzle_split(
struct rc_src_register src, unsigned int mask,
struct rc_swizzle_split * split)
{
split->NumPhases = 0;
while(mask) {
unsigned int best_matchcount = 0;
unsigned int best_matchmask = 0;
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data *sd = &native_swizzles[i];
unsigned int matchcount = 0;
unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
unsigned int swz;
if (!GET_BIT(mask, comp))
continue;
swz = GET_SWZ(src.Swizzle, comp);
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
/* check if the negate bit of current component
* is the same for already matched components */
if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
continue;
matchcount++;
matchmask |= 1 << comp;
}
}
if (matchcount > best_matchcount) {
best_matchcount = matchcount;
best_matchmask = matchmask;
if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
if (mask & RC_MASK_W)
best_matchmask |= RC_MASK_W;
split->Phase[split->NumPhases++] = best_matchmask;
mask &= ~best_matchmask;
}
}
struct rc_swizzle_caps r300_swizzle_caps = {
.IsNative = r300_swizzle_is_native,
.Split = r300_swizzle_split
};
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
* instruction source.
*/
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
if (src == RC_PAIR_PRESUB_SRC) {
return sd->base + sd->srcp_stride;
} else {
return sd->base + src*sd->stride;
}
}
/**
* Translate an Alpha (W) swizzle into the hardware code for the given
* instruction source.
*/
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
unsigned int swz = GET_SWZ(swizzle, 0);
if (src == RC_PAIR_PRESUB_SRC) {
return R300_ALU_ARGA_SRCP_X + swz;
}
if (swz < 3)
return swz + 3*src;
switch(swz) {
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
default: return R300_ALU_ARGA_ONE;
}
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
#include "radeon_swizzle.h"
extern struct rc_swizzle_caps r300_swizzle_caps;
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
int r300_swizzle_is_native_basic(unsigned int swizzle);
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */

View file

@ -0,0 +1,172 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "radeon_compiler.h"
#include <stdio.h>
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "radeon_rename_regs.h"
#include "radeon_remove_constants.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
static void dataflow_outputs_mark_use(void * userdata, void * data,
void (*callback)(void *, unsigned int, unsigned int))
{
struct r300_fragment_program_compiler * c = userdata;
callback(data, c->OutputColor[0], RC_MASK_XYZW);
callback(data, c->OutputColor[1], RC_MASK_XYZW);
callback(data, c->OutputColor[2], RC_MASK_XYZW);
callback(data, c->OutputColor[3], RC_MASK_XYZW);
callback(data, c->OutputDepth, RC_MASK_W);
}
static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction *rci;
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction * inst = &rci->U.I;
unsigned i;
const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
if (inst->DstReg.WriteMask & RC_MASK_Z) {
inst->DstReg.WriteMask = RC_MASK_W;
} else {
inst->DstReg.WriteMask = 0;
continue;
}
if (!info->IsComponentwise) {
continue;
}
for (i = 0; i < info->NumSrcRegs; i++) {
inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
}
}
}
static int radeon_saturate_output(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
{
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT)
return 0;
inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
return 1;
}
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
int opt = !c->Base.disable_optimizations;
int sat_out = c->state.frag_clamp;
/* Lists of instruction transformations. */
struct radeon_program_transformation saturate_output[] = {
{ &radeon_saturate_output, c },
{ 0, 0 }
};
struct radeon_program_transformation rewrite_tex[] = {
{ &radeonTransformTEX, c },
{ 0, 0 }
};
struct radeon_program_transformation rewrite_if[] = {
{ &r500_transform_IF, 0 },
{0, 0}
};
struct radeon_program_transformation native_rewrite_r500[] = {
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 },
{ 0, 0 }
};
struct radeon_program_transformation native_rewrite_r300[] = {
{ &radeonTransformALU, 0 },
{ &r300_transform_trig_simple, 0 },
{ 0, 0 }
};
/* List of compiler passes. */
struct radeon_compiler_pass fs_list[] = {
/* NAME DUMP PREDICATE FUNCTION PARAM */
{"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL},
/* This transformation needs to be done before any of the IF
* instructions are modified. */
{"transform KILP", 1, 1, rc_transform_KILP, NULL},
{"unroll loops", 1, is_r500, rc_unroll_loops, NULL},
{"transform loops", 1, !is_r500, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
{"saturate output writes", 1, sat_out, rc_local_transform, saturate_output},
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
{"transform IF", 1, is_r500, rc_local_transform, rewrite_if},
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use},
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
{"register rename", 1, !is_r500, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
{"register allocation", 1, 1, rc_pair_regalloc, &opt},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
{"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
{"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
{NULL, 0, 0, NULL, NULL}
};
c->Base.type = RC_FRAGMENT_PROGRAM;
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
rc_run_compiler(&c->Base, fs_list);
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,207 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "radeon_compiler.h"
#include "radeon_code.h"
#include "../r300_reg.h"
#include <stdio.h>
static char* r300_vs_ve_ops[] = {
/* R300 vector ops */
" VE_NO_OP",
" VE_DOT_PRODUCT",
" VE_MULTIPLY",
" VE_ADD",
" VE_MULTIPLY_ADD",
" VE_DISTANCE_FACTOR",
" VE_FRACTION",
" VE_MAXIMUM",
" VE_MINIMUM",
"VE_SET_GREATER_THAN_EQUAL",
" VE_SET_LESS_THAN",
" VE_MULTIPLYX2_ADD",
" VE_MULTIPLY_CLAMP",
" VE_FLT2FIX_DX",
" VE_FLT2FIX_DX_RND",
/* R500 vector ops */
" VE_PRED_SET_EQ_PUSH",
" VE_PRED_SET_GT_PUSH",
" VE_PRED_SET_GTE_PUSH",
" VE_PRED_SET_NEQ_PUSH",
" VE_COND_WRITE_EQ",
" VE_COND_WRITE_GT",
" VE_COND_WRITE_GTE",
" VE_COND_WRITE_NEQ",
" VE_COND_MUX_EQ",
" VE_COND_MUX_GT",
" VE_COND_MUX_GTE",
" VE_SET_GREATER_THAN",
" VE_SET_EQUAL",
" VE_SET_NOT_EQUAL",
" (reserved)",
" (reserved)",
" (reserved)",
};
static char* r300_vs_me_ops[] = {
/* R300 math ops */
" ME_NO_OP",
" ME_EXP_BASE2_DX",
" ME_LOG_BASE2_DX",
" ME_EXP_BASEE_FF",
" ME_LIGHT_COEFF_DX",
" ME_POWER_FUNC_FF",
" ME_RECIP_DX",
" ME_RECIP_FF",
" ME_RECIP_SQRT_DX",
" ME_RECIP_SQRT_FF",
" ME_MULTIPLY",
" ME_EXP_BASE2_FULL_DX",
" ME_LOG_BASE2_FULL_DX",
" ME_POWER_FUNC_FF_CLAMP_B",
"ME_POWER_FUNC_FF_CLAMP_B1",
"ME_POWER_FUNC_FF_CLAMP_01",
" ME_SIN",
" ME_COS",
/* R500 math ops */
" ME_LOG_BASE2_IEEE",
" ME_RECIP_IEEE",
" ME_RECIP_SQRT_IEEE",
" ME_PRED_SET_EQ",
" ME_PRED_SET_GT",
" ME_PRED_SET_GTE",
" ME_PRED_SET_NEQ",
" ME_PRED_SET_CLR",
" ME_PRED_SET_INV",
" ME_PRED_SET_POP",
" ME_PRED_SET_RESTORE",
" (reserved)",
" (reserved)",
" (reserved)",
};
/* XXX refactor to avoid clashing symbols */
static char* r300_vs_src_debug[] = {
"t",
"i",
"c",
"a",
};
static char* r300_vs_dst_debug[] = {
"t",
"a0",
"o",
"ox",
"a",
"i",
"u",
"u",
};
static char* r300_vs_swiz_debug[] = {
"X",
"Y",
"Z",
"W",
"0",
"1",
"U",
"U",
};
static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
fprintf(stderr, "PRED %u",
(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
}
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
} else {
fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
}
} else if (op & 0x40) {
fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
} else {
fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
}
}
static void r300_vs_src_dump(uint32_t src)
{
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
src & (1 << 25) ? "-" : " ",
r300_vs_swiz_debug[(src >> 13) & 0x7],
src & (1 << 26) ? "-" : " ",
r300_vs_swiz_debug[(src >> 16) & 0x7],
src & (1 << 27) ? "-" : " ",
r300_vs_swiz_debug[(src >> 19) & 0x7],
src & (1 << 28) ? "-" : " ",
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
{
struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
fprintf(stderr, "Final vertex program code:\n");
for(i = 0; i < instrcount; i++) {
unsigned offset = i*4;
unsigned src;
fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
r300_vs_op_dump(vs->body.d[offset]);
for(src = 0; src < 3; ++src) {
fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
for(i = 0; i < vs->num_fc_ops; i++) {
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
case 0: fprintf(stderr, "NOP"); break;
case 1: fprintf(stderr, "JUMP"); break;
case 2: fprintf(stderr, "LOOP"); break;
case 3: fprintf(stderr, "JSR"); break;
}
if (c->Base.is_r500) {
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
vs->fc_op_addrs.r500[i].uw,
vs->fc_op_addrs.r500[i].lw);
} else {
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
}
}
}

View file

@ -0,0 +1,539 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "r500_fragprog.h"
#include <stdio.h>
#include "radeon_compiler_util.h"
#include "radeon_list.h"
#include "radeon_variable.h"
#include "../r300_reg.h"
/**
* Rewrite IF instructions to use the ALU result special register.
*/
int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst_if,
void *data)
{
struct rc_variable * writer;
struct rc_list * writer_list, * list_ptr;
struct rc_list * var_list = rc_get_variables(c);
unsigned int generic_if = 0;
unsigned int alu_chan;
if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
return 0;
}
writer_list = rc_variable_list_get_writers(
var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
if (!writer_list) {
generic_if = 1;
} else {
/* Make sure it is safe for the writers to write to
* ALU Result */
for (list_ptr = writer_list; list_ptr;
list_ptr = list_ptr->Next) {
struct rc_instruction * inst;
writer = list_ptr->Item;
/* We are going to modify the destination register
* of writer, so if it has a reader other than
* inst_if (aka ReaderCount > 1) we must fall back to
* our generic IF.
* If the writer has a lower IP than inst_if, this
* means that inst_if is above the writer in a loop.
* I'm not sure why this would ever happen, but
* if it does we want to make sure we fall back
* to our generic IF. */
if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
generic_if = 1;
break;
}
/* The ALU Result is not preserved across IF
* instructions, so if there is another IF
* instruction between writer and inst_if, then
* we need to fall back to generic IF. */
for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
if (info->IsFlowControl) {
generic_if = 1;
break;
}
}
if (generic_if) {
break;
}
}
}
if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
alu_chan = RC_ALURESULT_X;
} else {
alu_chan = RC_ALURESULT_W;
}
if (generic_if) {
struct rc_instruction * inst_mov =
rc_insert_new_instruction(c, inst_if->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.WriteMask = 0;
inst_mov->U.I.DstReg.File = RC_FILE_NONE;
inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
inst_mov->U.I.WriteALUResult = alu_chan;
inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
if (alu_chan == RC_ALURESULT_X) {
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
inst_mov->U.I.SrcReg[0].Swizzle,
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
} else {
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
inst_mov->U.I.SrcReg[0].Swizzle,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
}
} else {
rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
unsigned int reverse_srcs = 0;
unsigned int preserve_opcode = 0;
for (list_ptr = writer_list; list_ptr;
list_ptr = list_ptr->Next) {
writer = list_ptr->Item;
switch(writer->Inst->U.I.Opcode) {
case RC_OPCODE_SEQ:
compare_func = RC_COMPARE_FUNC_EQUAL;
break;
case RC_OPCODE_SNE:
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
break;
case RC_OPCODE_SLE:
reverse_srcs = 1;
/* Fall through */
case RC_OPCODE_SGE:
compare_func = RC_COMPARE_FUNC_GEQUAL;
break;
case RC_OPCODE_SGT:
reverse_srcs = 1;
/* Fall through */
case RC_OPCODE_SLT:
compare_func = RC_COMPARE_FUNC_LESS;
break;
default:
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
preserve_opcode = 1;
break;
}
if (!preserve_opcode) {
writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
}
writer->Inst->U.I.DstReg.WriteMask = 0;
writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
writer->Inst->U.I.WriteALUResult = alu_chan;
writer->Inst->U.I.ALUResultCompare = compare_func;
if (reverse_srcs) {
struct rc_src_register temp_src;
temp_src = writer->Inst->U.I.SrcReg[0];
writer->Inst->U.I.SrcReg[0] =
writer->Inst->U.I.SrcReg[1];
writer->Inst->U.I.SrcReg[1] = temp_src;
}
}
}
inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
inst_if->U.I.SrcReg[0].Negate = 0;
return 1;
}
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
int i;
if (opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP ||
opcode == RC_OPCODE_TXD ||
opcode == RC_OPCODE_TXL ||
opcode == RC_OPCODE_KIL) {
if (reg.Abs)
return 0;
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
return 0;
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
if (swz == RC_SWIZZLE_UNUSED) {
reg.Negate &= ~(1 << i);
continue;
}
if (swz >= 4)
return 0;
}
if (reg.Negate)
return 0;
return 1;
} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
* if it doesn't fit perfectly into a .xyzw case... */
if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
return 1;
return 0;
} else {
/* ALU instructions support almost everything */
relevant = 0;
for(i = 0; i < 3; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
return 1;
}
}
/**
* Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
* negation.
*/
static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
struct rc_swizzle_split * split)
{
unsigned int negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(src.Swizzle, i);
if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
split->NumPhases = 0;
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
split->Phase[split->NumPhases++] = negatebase[i];
}
}
struct rc_swizzle_caps r500_swizzle_caps = {
.IsNative = r500_swizzle_is_native,
.Split = r500_swizzle_split
};
static char *toswiz(int swiz_val) {
switch(swiz_val) {
case 0: return "R";
case 1: return "G";
case 2: return "B";
case 3: return "A";
case 4: return "0";
case 5: return "H";
case 6: return "1";
case 7: return "U";
}
return NULL;
}
static char *toop(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP3"; break;
case 2: str = "DP4"; break;
case 3: str = "D2A"; break;
case 4: str = "MIN"; break;
case 5: str = "MAX"; break;
case 6: str = "Reserved"; break;
case 7: str = "CND"; break;
case 8: str = "CMP"; break;
case 9: str = "FRC"; break;
case 10: str = "SOP"; break;
case 11: str = "MDH"; break;
case 12: str = "MDV"; break;
}
return str;
}
static char *to_alpha_op(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP"; break;
case 2: str = "MIN"; break;
case 3: str = "MAX"; break;
case 4: str = "Reserved"; break;
case 5: str = "CND"; break;
case 6: str = "CMP"; break;
case 7: str = "FRC"; break;
case 8: str = "EX2"; break;
case 9: str = "LN2"; break;
case 10: str = "RCP"; break;
case 11: str = "RSQ"; break;
case 12: str = "SIN"; break;
case 13: str = "COS"; break;
case 14: str = "MDH"; break;
case 15: str = "MDV"; break;
}
return str;
}
static char *to_mask(int val)
{
char *str = NULL;
switch(val) {
case 0: str = "NONE"; break;
case 1: str = "R"; break;
case 2: str = "G"; break;
case 3: str = "RG"; break;
case 4: str = "B"; break;
case 5: str = "RB"; break;
case 6: str = "GB"; break;
case 7: str = "RGB"; break;
case 8: str = "A"; break;
case 9: str = "AR"; break;
case 10: str = "AG"; break;
case 11: str = "ARG"; break;
case 12: str = "AB"; break;
case 13: str = "ARB"; break;
case 14: str = "AGB"; break;
case 15: str = "ARGB"; break;
}
return str;
}
static char *to_texop(int val)
{
switch(val) {
case 0: return "NOP";
case 1: return "LD";
case 2: return "TEXKILL";
case 3: return "PROJ";
case 4: return "LODBIAS";
case 5: return "LOD";
case 6: return "DXDY";
}
return NULL;
}
void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
int n, i;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
fprintf(stderr, "R500 Fragment Program:\n--------\n");
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
switch(inst & 0x3) {
case R500_INST_TYPE_ALU: str = "ALU"; break;
case R500_INST_TYPE_OUT: str = "OUT"; break;
case R500_INST_TYPE_FC: str = "FC"; break;
case R500_INST_TYPE_TEX: str = "TEX"; break;
};
fprintf(stderr,"%s %s %s %s %s ", str,
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
inst & R500_INST_LAST ? "LAST" : "",
inst & R500_INST_NOP ? "NOP" : "",
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
to_mask((inst >> 15) & 0xf));
switch(inst0 & 0x3) {
case R500_INST_TYPE_ALU:
case R500_INST_TYPE_OUT:
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
inst = code->inst[n].inst1;
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
inst = code->inst[n].inst2;
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
inst = code->inst[n].inst3;
fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
(inst >> 11) & 0x3,
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
(inst >> 24) & 0x3, (inst >> 29) & 0x3);
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
inst = code->inst[n].inst4;
fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
(inst >> 29) & 0x3,
(inst >> 31) & 0x1);
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
inst = code->inst[n].inst5;
fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
(inst >> 23) & 0x3,
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
break;
case R500_INST_TYPE_FC:
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
inst = code->inst[n].inst2;
/* JUMP_FUNC JUMP_ANY*/
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
(inst & R500_FC_JUMP_ANY) >> 5);
/* OP */
switch(inst & 0x7){
case R500_FC_OP_JUMP:
fprintf(stderr, "JUMP");
break;
case R500_FC_OP_LOOP:
fprintf(stderr, "LOOP");
break;
case R500_FC_OP_ENDLOOP:
fprintf(stderr, "ENDLOOP");
break;
case R500_FC_OP_REP:
fprintf(stderr, "REP");
break;
case R500_FC_OP_ENDREP:
fprintf(stderr, "ENDREP");
break;
case R500_FC_OP_BREAKLOOP:
fprintf(stderr, "BREAKLOOP");
break;
case R500_FC_OP_BREAKREP:
fprintf(stderr, "BREAKREP");
break;
case R500_FC_OP_CONTINUE:
fprintf(stderr, "CONTINUE");
break;
}
fprintf(stderr," ");
/* A_OP */
switch(inst & (0x3 << 6)){
case R500_FC_A_OP_NONE:
fprintf(stderr, "NONE");
break;
case R500_FC_A_OP_POP:
fprintf(stderr, "POP");
break;
case R500_FC_A_OP_PUSH:
fprintf(stderr, "PUSH");
break;
}
/* B_OP0 B_OP1 */
for(i=0; i<2; i++){
fprintf(stderr, " ");
switch(inst & (0x3 << (24 + (i * 2)))){
/* R500_FC_B_OP0_NONE
* R500_FC_B_OP1_NONE */
case 0:
fprintf(stderr, "NONE");
break;
case R500_FC_B_OP0_DECR:
case R500_FC_B_OP1_DECR:
fprintf(stderr, "DECR");
break;
case R500_FC_B_OP0_INCR:
case R500_FC_B_OP1_INCR:
fprintf(stderr, "INCR");
break;
}
}
/*POP_CNT B_ELSE */
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
inst = code->inst[n].inst3;
/* JUMP_ADDR */
fprintf(stderr, " %d", inst >> 16);
if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
fprintf(stderr, " IGN_UNC");
}
inst = code->inst[n].inst3;
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
break;
case R500_INST_TYPE_TEX:
inst = code->inst[n].inst1;
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
inst = code->inst[n].inst2;
fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
inst & 127, inst & (1<<7) ? "(rel)" : "",
toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
break;
}
fprintf(stderr,"\n");
}
}

View file

@ -0,0 +1,50 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R500_FRAGPROG_H_
#define __R500_FRAGPROG_H_
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
extern struct rc_swizzle_caps r500_swizzle_caps;
extern int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst_if,
void* data);
#endif

View file

@ -0,0 +1,678 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
*/
#include "r500_fragprog.h"
#include "../r300_reg.h"
#include "radeon_program_pair.h"
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
struct branch_info {
int If;
int Else;
int Endif;
};
struct r500_loop_info {
int BgnLoop;
int BranchDepth;
int * Brks;
int BrkCount;
int BrkReserved;
int * Conts;
int ContCount;
int ContReserved;
};
struct emit_state {
struct radeon_compiler * C;
struct r500_fragment_program_code * Code;
struct branch_info * Branches;
unsigned int CurrentBranchDepth;
unsigned int BranchesReserved;
struct r500_loop_info * Loops;
unsigned int CurrentLoopDepth;
unsigned int LoopsReserved;
unsigned int MaxBranchDepth;
};
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
/* fall through */
case RC_OPCODE_NOP:
/* fall through */
case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
static unsigned int fix_hw_swizzle(unsigned int swz)
{
switch (swz) {
case RC_SWIZZLE_ZERO:
case RC_SWIZZLE_UNUSED:
swz = 4;
break;
case RC_SWIZZLE_HALF:
swz = 5;
break;
case RC_SWIZZLE_ONE:
swz = 6;
break;
}
return swz;
}
static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
{
unsigned int t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
for(comp = 0; comp < 3; ++comp)
t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
return t;
}
static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
unsigned int t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
{
switch(func) {
case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
default:
rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
return 0;
}
}
static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
/* From docs:
* Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
* MSB = 1 << 7 */
if (!src.Used)
return 1 << 7;
if (src.File == RC_FILE_CONSTANT) {
return src.Index | R500_RGB_ADDR0_CONST;
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
}
return 0;
}
/**
* NOP the specified instruction if it is not a texture lookup.
*/
static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
{
PROG_CODE;
if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
code->inst[ip].inst0 |= R500_INST_NOP;
}
}
/**
* Emit a paired ALU instruction.
*/
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_alu: Too many instructions");
return;
}
ip = ++code->inst_end;
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
if (ip > 0) {
alu_nop(c, ip - 1);
}
}
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
if (inst->WriteALUResult) {
error("Cannot write output and ALU result at the same time");
return;
}
} else {
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
}
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Nop) {
code->inst[ip].inst0 |= R500_INST_NOP;
}
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
c->code->writes_depth = 1;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
use_temporary(code, inst->Alpha.DestIndex);
use_temporary(code, inst->RGB.DestIndex);
if (inst->RGB.Saturate)
code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
if (inst->Alpha.Saturate)
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
/* Set the presubtract operation. */
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
break;
default:
break;
}
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
break;
default:
break;
}
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
if (inst->WriteALUResult) {
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
if (inst->WriteALUResult == RC_ALURESULT_X)
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
else
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
}
}
static unsigned int translate_strq_swizzle(unsigned int swizzle)
{
unsigned int swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
return swiz;
}
/**
* Emit a single TEX instruction
*/
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
int ip;
PROG_CODE;
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_tex: Too many instructions");
return 0;
}
ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
| (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE;
if (inst->TexSrcTarget == RC_TEXTURE_RECT)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
case RC_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
case RC_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
case RC_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
case RC_OPCODE_TXD:
code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
break;
case RC_OPCODE_TXL:
code->inst[ip].inst1 |= R500_TEX_INST_LOD;
break;
default:
error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
}
use_temporary(code, inst->SrcReg[0].Index);
if (inst->Opcode != RC_OPCODE_KIL)
use_temporary(code, inst->DstReg.Index);
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
| R500_TEX_DST_ADDR(inst->DstReg.Index)
| (GET_SWZ(inst->TexSwizzle, 0) << 24)
| (GET_SWZ(inst->TexSwizzle, 1) << 26)
| (GET_SWZ(inst->TexSwizzle, 2) << 28)
| (GET_SWZ(inst->TexSwizzle, 3) << 30)
;
if (inst->Opcode == RC_OPCODE_TXD) {
use_temporary(code, inst->SrcReg[1].Index);
use_temporary(code, inst->SrcReg[2].Index);
/* DX and DY parameters are specified in a separate register. */
code->inst[ip].inst3 =
R500_DX_ADDR(inst->SrcReg[1].Index) |
(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
R500_DY_ADDR(inst->SrcReg[2].Index) |
(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
}
return 1;
}
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
unsigned int newip;
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
rc_error(s->C, "emit_tex: Too many instructions");
return;
}
newip = ++s->Code->inst_end;
/* Currently all loops use the same integer constant to intialize
* the loop variables. */
if(!s->Code->int_constants[0]) {
s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
s->Code->int_constant_count = 1;
}
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
struct branch_info * branch;
struct r500_loop_info * loop;
case RC_OPCODE_BGNLOOP:
memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
memset(loop, 0, sizeof(struct r500_loop_info));
loop->BranchDepth = s->CurrentBranchDepth;
loop->BgnLoop = newip;
s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
| R500_FC_JUMP_FUNC(0x00)
| R500_FC_IGNORE_UNCOVERED
;
break;
case RC_OPCODE_BRK:
loop = &s->Loops[s->CurrentLoopDepth - 1];
memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
loop->BrkCount, loop->BrkReserved, 1);
loop->Brks[loop->BrkCount++] = newip;
s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_B_OP1_DECR
| R500_FC_B_POP_CNT(
s->CurrentBranchDepth - loop->BranchDepth)
| R500_FC_IGNORE_UNCOVERED
;
break;
case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
loop->ContCount, loop->ContReserved, 1);
loop->Conts[loop->ContCount++] = newip;
s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_B_OP1_DECR
| R500_FC_B_POP_CNT(
s->CurrentBranchDepth - loop->BranchDepth)
| R500_FC_IGNORE_UNCOVERED
;
break;
case RC_OPCODE_ENDLOOP:
{
loop = &s->Loops[s->CurrentLoopDepth - 1];
/* Emit ENDLOOP */
s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_JUMP_ANY
| R500_FC_IGNORE_UNCOVERED
;
/* The constant integer at index 0 is used by all loops. */
s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
;
/* Set jump address and int constant for BGNLOOP */
s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
| R500_FC_JUMP_ADDR(newip)
;
/* Set jump address for the BRK instructions. */
while(loop->BrkCount--) {
s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
R500_FC_JUMP_ADDR(newip + 1);
}
/* Set jump address for CONT instructions. */
while(loop->ContCount--) {
s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
R500_FC_JUMP_ADDR(newip);
}
s->CurrentLoopDepth--;
break;
}
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
branch = &s->Branches[s->CurrentBranchDepth++];
branch->If = newip;
branch->Else = -1;
branch->Endif = -1;
if (s->CurrentBranchDepth > s->MaxBranchDepth)
s->MaxBranchDepth = s->CurrentBranchDepth;
/* actual instruction is filled in at ENDIF time */
break;
case RC_OPCODE_ELSE:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
branch->Else = newip;
/* actual instruction is filled in at ENDIF time */
break;
case RC_OPCODE_ENDIF:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
branch->Endif = newip;
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
| R500_FC_B_OP1_NONE /* no branch counter if stay */
| R500_FC_B_POP_CNT(1)
;
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
| R500_FC_B_OP0_INCR /* increment branch counter if stay */
| R500_FC_IGNORE_UNCOVERED
;
if (branch->Else >= 0) {
/* increment branch counter also if jump */
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_B_ELSE /* all active pixels want to jump */
| R500_FC_B_OP0_NONE /* no counter op if stay */
| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
| R500_FC_B_POP_CNT(1)
;
s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
} else {
/* don't touch branch counter on jump */
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
}
s->CurrentBranchDepth--;
break;
default:
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
}
}
void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
{
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct emit_state s;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
memset(&s, 0, sizeof(s));
s.C = &compiler->Base;
s.Code = code;
memset(code, 0, sizeof(*code));
code->max_temp_idx = 1;
code->inst_end = -1;
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
inst = inst->Next) {
if (inst->Type == RC_INSTRUCTION_NORMAL) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->IsFlowControl) {
emit_flowcontrol(&s, inst);
} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
continue;
} else {
emit_tex(compiler, &inst->U.I);
}
} else {
emit_paired(compiler, &inst->U.P);
}
}
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used");
if (compiler->Base.Error)
return;
if (code->inst_end == -1 ||
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
int ip;
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
return;
}
ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
/* Enable full flow control mode if we are using loops or have if
* statements nested at least four deep. */
if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
}
}

View file

@ -0,0 +1,187 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_code.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "radeon_program.h"
void rc_constants_init(struct rc_constant_list * c)
{
memset(c, 0, sizeof(*c));
}
/**
* Copy a constants structure, assuming that the destination structure
* is not initialized.
*/
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
{
dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
dst->Count = src->Count;
dst->_Reserved = src->Count;
}
void rc_constants_destroy(struct rc_constant_list * c)
{
free(c->Constants);
memset(c, 0, sizeof(*c));
}
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
{
unsigned index = c->Count;
if (c->Count >= c->_Reserved) {
struct rc_constant * newlist;
c->_Reserved = c->_Reserved * 2;
if (!c->_Reserved)
c->_Reserved = 16;
newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
free(c->Constants);
c->Constants = newlist;
}
c->Constants[index] = *constant;
c->Count++;
return index;
}
/**
* Add a state vector to the constant list, while trying to avoid duplicates.
*/
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
{
unsigned index;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
if (c->Constants[index].u.State[0] == state0 &&
c->Constants[index].u.State[1] == state1)
return index;
}
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_STATE;
constant.Size = 4;
constant.u.State[0] = state0;
constant.u.State[1] = state1;
return rc_constants_add(c, &constant);
}
/**
* Add an immediate vector to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
{
unsigned index;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
return index;
}
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 4;
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
return rc_constants_add(c, &constant);
}
/**
* Add an immediate scalar to the constant list, while trying to avoid
* duplicates.
*/
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
{
unsigned index;
int free_index = -1;
struct rc_constant constant;
for(index = 0; index < c->Count; ++index) {
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
unsigned comp;
for(comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return index;
}
}
if (c->Constants[index].Size < 4)
free_index = index;
}
}
if (free_index >= 0) {
unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return free_index;
}
memset(&constant, 0, sizeof(constant));
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 1;
constant.u.Immediate[0] = data;
*swizzle = RC_SWIZZLE_XXXX;
return rc_constants_add(c, &constant);
}
void rc_constants_print(struct rc_constant_list * c)
{
unsigned int i;
for(i = 0; i < c->Count; i++) {
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
float * values = c->Constants[i].u.Immediate;
fprintf(stderr, "CONST[%u] = "
"{ %10.4f %10.4f %10.4f %10.4f }\n",
i, values[0],values[1], values[2], values[3]);
}
}
}

View file

@ -0,0 +1,306 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#ifndef RADEON_CODE_H
#define RADEON_CODE_H
#include <stdint.h>
#define R300_PFS_MAX_ALU_INST 64
#define R300_PFS_MAX_TEX_INST 32
#define R300_PFS_MAX_TEX_INDIRECT 4
#define R300_PFS_NUM_TEMP_REGS 32
#define R300_PFS_NUM_CONST_REGS 32
#define R400_PFS_MAX_ALU_INST 512
#define R400_PFS_MAX_TEX_INST 512
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
enum {
/**
* External constants are constants whose meaning is unknown to this
* compiler. For example, a Mesa gl_program's constants are turned
* into external constants.
*/
RC_CONSTANT_EXTERNAL = 0,
RC_CONSTANT_IMMEDIATE,
/**
* Constant referring to state that is known by this compiler,
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
*/
RC_CONSTANT_STATE
};
enum {
RC_STATE_SHADOW_AMBIENT = 0,
RC_STATE_R300_WINDOW_DIMENSION,
RC_STATE_R300_TEXRECT_FACTOR,
RC_STATE_R300_TEXSCALE_FACTOR,
RC_STATE_R300_VIEWPORT_SCALE,
RC_STATE_R300_VIEWPORT_OFFSET
};
struct rc_constant {
unsigned Type:2; /**< RC_CONSTANT_xxx */
unsigned Size:3;
union {
unsigned External;
float Immediate[4];
unsigned State[2];
} u;
};
struct rc_constant_list {
struct rc_constant * Constants;
unsigned Count;
unsigned _Reserved;
};
void rc_constants_init(struct rc_constant_list * c);
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
void rc_constants_destroy(struct rc_constant_list * c);
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
void rc_constants_print(struct rc_constant_list * c);
/**
* Compare functions.
*
* \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
* the correct GL compare function.
*/
typedef enum {
RC_COMPARE_FUNC_NEVER = 0,
RC_COMPARE_FUNC_LESS,
RC_COMPARE_FUNC_EQUAL,
RC_COMPARE_FUNC_LEQUAL,
RC_COMPARE_FUNC_GREATER,
RC_COMPARE_FUNC_NOTEQUAL,
RC_COMPARE_FUNC_GEQUAL,
RC_COMPARE_FUNC_ALWAYS
} rc_compare_func;
/**
* Coordinate wrapping modes.
*
* These are not quite the same as their GL counterparts yet.
*/
typedef enum {
RC_WRAP_NONE = 0,
RC_WRAP_REPEAT,
RC_WRAP_MIRRORED_REPEAT,
RC_WRAP_MIRRORED_CLAMP
} rc_wrap_mode;
/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
struct {
/**
* This field contains swizzle for some lowering passes
* (shadow comparison, unorm->snorm conversion)
*/
unsigned texture_swizzle:12;
/**
* If the sampler is used as a shadow sampler,
* this field specifies the compare function.
*
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
* \sa rc_compare_func
*/
unsigned texture_compare_func : 3;
/**
* No matter what the sampler type is,
* this field turns it into a shadow sampler.
*/
unsigned compare_mode_enabled : 1;
/**
* If the sampler will receive non-normalized coords,
* this field is set. The scaling factor is given by
* RC_STATE_R300_TEXRECT_FACTOR.
*/
unsigned non_normalized_coords : 1;
/**
* This field specifies wrapping modes for the sampler.
*
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
* will be performed on the coordinates.
*/
unsigned wrap_mode : 3;
/**
* The coords are scaled after applying the wrap mode emulation
* and right before texture fetch. The scaling factor is given by
* RC_STATE_R300_TEXSCALE_FACTOR. */
unsigned clamp_and_scale_before_fetch : 1;
/**
* Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
* in the shader.
*/
unsigned convert_unorm_to_snorm:1;
} unit[16];
unsigned frag_clamp:1;
};
struct r300_fragment_program_node {
int tex_offset; /**< first tex instruction */
int tex_end; /**< last tex instruction, relative to tex_offset */
int alu_offset; /**< first ALU instruction */
int alu_end; /**< last ALU instruction, relative to alu_offset */
int flags;
};
/**
* Stores an R300 fragment program in its compiled-to-hardware form.
*/
struct r300_fragment_program_code {
struct {
unsigned int length; /**< total # of texture instructions used */
uint32_t inst[R400_PFS_MAX_TEX_INST];
} tex;
struct {
unsigned int length; /**< total # of ALU instructions used */
struct {
uint32_t rgb_inst;
uint32_t rgb_addr;
uint32_t alpha_inst;
uint32_t alpha_addr;
uint32_t r400_ext_addr;
} inst[R400_PFS_MAX_ALU_INST];
} alu;
uint32_t config; /* US_CONFIG */
uint32_t pixsize; /* US_PIXSIZE */
uint32_t code_offset; /* US_CODE_OFFSET */
uint32_t r400_code_offset_ext; /* US_CODE_EXT */
uint32_t code_addr[4]; /* US_CODE_ADDR */
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
* for r400 cards */
unsigned int r390_mode:1;
};
struct r500_fragment_program_code {
struct {
uint32_t inst0;
uint32_t inst1;
uint32_t inst2;
uint32_t inst3;
uint32_t inst4;
uint32_t inst5;
} inst[R500_PFS_MAX_INST];
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
int max_temp_idx;
uint32_t us_fc_ctrl;
uint32_t int_constants[32];
uint32_t int_constant_count;
};
struct rX00_fragment_program_code {
union {
struct r300_fragment_program_code r300;
struct r500_fragment_program_code r500;
} code;
unsigned writes_depth:1;
struct rc_constant_list constants;
unsigned *constants_remap_table;
};
#define R300_VS_MAX_ALU 256
#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
/* This is the max for all chipsets (r300-r500) */
#define R300_VS_MAX_FC_OPS 16
/* The r500 maximum depth is not just for loops, but any combination of loops
* and subroutine jumps. */
#define R500_VS_MAX_FC_DEPTH 8
#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
struct r300_vertex_program_code {
int length;
union {
uint32_t d[R500_VS_MAX_ALU_DWORDS];
float f[R500_VS_MAX_ALU_DWORDS];
} body;
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
int inputs[VSF_MAX_INPUTS];
int outputs[VSF_MAX_OUTPUTS];
struct rc_constant_list constants;
unsigned *constants_remap_table;
uint32_t InputsRead;
uint32_t OutputsWritten;
unsigned int num_fc_ops;
uint32_t fc_ops;
union {
uint32_t r300[R300_VS_MAX_FC_OPS];
struct {
uint32_t lw;
uint32_t uw;
} r500[R300_VS_MAX_FC_OPS];
} fc_op_addrs;
int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
};
#endif /* RADEON_CODE_H */

View file

@ -0,0 +1,489 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "radeon_compiler.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_pair.h"
#include "radeon_compiler_util.h"
void rc_init(struct radeon_compiler * c)
{
memset(c, 0, sizeof(*c));
memory_pool_init(&c->Pool);
c->Program.Instructions.Prev = &c->Program.Instructions;
c->Program.Instructions.Next = &c->Program.Instructions;
c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
}
void rc_destroy(struct radeon_compiler * c)
{
rc_constants_destroy(&c->Program.Constants);
memory_pool_destroy(&c->Pool);
free(c->ErrorMsg);
}
void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
if (!(c->Debug & RC_DBG_LOG))
return;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
void rc_error(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
c->Error = 1;
if (!c->ErrorMsg) {
/* Only remember the first error */
char buf[1024];
int written;
va_start(ap, fmt);
written = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
if (written < sizeof(buf)) {
c->ErrorMsg = strdup(buf);
} else {
c->ErrorMsg = malloc(written + 1);
va_start(ap, fmt);
vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
va_end(ap);
}
}
if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "r300compiler error: ");
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
}
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
{
rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
return 1;
}
/**
* Recompute c->Program.InputsRead and c->Program.OutputsWritten
* based on which inputs and outputs are actually referenced
* in program instructions.
*/
void rc_calculate_inputs_outputs(struct radeon_compiler * c)
{
struct rc_instruction *inst;
c->Program.InputsRead = 0;
c->Program.OutputsWritten = 0;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int i;
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
}
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
}
}
}
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
*/
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
{
struct rc_instruction * inst;
c->Program.InputsRead &= ~(1 << input);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
for(i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
inst->U.I.SrcReg[i].File = new_input.File;
inst->U.I.SrcReg[i].Index = new_input.Index;
inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
if (!inst->U.I.SrcReg[i].Abs) {
inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
inst->U.I.SrcReg[i].Abs = new_input.Abs;
}
c->Program.InputsRead |= 1 << new_input.Index;
}
}
}
}
/**
* Rewrite the program such that everything that writes into the given
* output register will instead write to new_output. The new_output
* writemask is honoured.
*/
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
{
struct rc_instruction * inst;
c->Program.OutputsWritten &= ~(1 << output);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
inst->U.I.DstReg.Index = new_output;
inst->U.I.DstReg.WriteMask &= writemask;
c->Program.OutputsWritten |= 1 << new_output;
}
}
}
}
/**
* Rewrite the program such that a given output is duplicated.
*/
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
{
unsigned tempreg = rc_find_free_temporary(c);
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasDstReg) {
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tempreg;
}
}
}
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = output;
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = tempreg;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = dup_output;
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = tempreg;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
c->Program.OutputsWritten |= 1 << dup_output;
}
/**
* Introduce standard code fragment to deal with fragment.position.
*/
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform)
{
unsigned tempregi = rc_find_free_temporary(c);
struct rc_instruction * inst_rcp;
struct rc_instruction * inst_mul;
struct rc_instruction * inst_mad;
struct rc_instruction * inst;
c->Program.InputsRead &= ~(1 << wpos);
c->Program.InputsRead |= 1 << new_input;
/* perspective divide */
inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = tempregi;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
inst_rcp->U.I.SrcReg[0].Index = new_input;
inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
inst_mul = rc_insert_new_instruction(c, inst_rcp);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = tempregi;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
inst_mul->U.I.SrcReg[0].Index = new_input;
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = tempregi;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
/* viewport transformation */
inst_mad = rc_insert_new_instruction(c, inst_mul);
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tempregi;
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mad->U.I.SrcReg[0].Index = tempregi;
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
if (full_vtransform) {
inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
} else {
inst_mad->U.I.SrcReg[1].Index =
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
}
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
for(i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
inst->U.I.SrcReg[i].Index == wpos) {
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
}
/**
* The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
* Gallium and OpenGL define it the other way around.
*
* So let's just negate FACE at the beginning of the shader and rewrite the rest
* of the shader to read from the newly allocated temporary.
*/
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
{
unsigned tempregi = rc_find_free_temporary(c);
struct rc_instruction *inst_add;
struct rc_instruction *inst;
/* perspective divide */
inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
inst_add->U.I.Opcode = RC_OPCODE_ADD;
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = tempregi;
inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
inst_add->U.I.SrcReg[1].Index = face;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
for(i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
inst->U.I.SrcReg[i].Index == face) {
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
}
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
int *max_reg = userdata;
if (file == RC_FILE_TEMPORARY)
(int)index > *max_reg ? *max_reg = index : 0;
}
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
int max_reg = -1;
struct rc_instruction * tmp;
memset(s, 0, sizeof(*s));
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(tmp->U.I.Opcode);
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
continue;
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
s->num_presub_ops++;
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
s->num_presub_ops++;
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
s->num_presub_ops++;
/* Assuming alpha will never be a flow control or
* a tex instruction. */
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
s->num_rgb_insts++;
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
s->num_fc_insts++;
if (info->HasTexture)
s->num_tex_insts++;
s->num_insts++;
}
s->num_temp_regs = max_reg + 1;
}
static void print_stats(struct radeon_compiler * c)
{
struct rc_program_stats s;
if (c->initial_num_insts <= 5)
return;
rc_get_stats(c, &s);
switch (c->type) {
case RC_VERTEX_PROGRAM:
fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
"~%4u Instructions\n"
"~%4u Flow Control Instructions\n"
"~%4u Temporary Registers\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_fc_insts, s.num_temp_regs);
break;
case RC_FRAGMENT_PROGRAM:
fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
"~%4u Instructions\n"
"~%4u Vector Instructions (RGB)\n"
"~%4u Scalar Instructions (Alpha)\n"
"~%4u Flow Control Instructions\n"
"~%4u Texture Instructions\n"
"~%4u Presub Operations\n"
"~%4u Temporary Registers\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
s.num_temp_regs);
break;
default:
assert(0);
}
}
static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
"Vertex Program",
"Fragment Program"
};
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
{
for (unsigned i = 0; list[i].name; i++) {
if (list[i].predicate) {
list[i].run(c, list[i].user);
if (c->Error)
return;
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
rc_print_program(&c->Program);
}
}
}
}
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
{
struct rc_program_stats s;
rc_get_stats(c, &s);
c->initial_num_insts = s.num_insts;
if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
rc_print_program(&c->Program);
}
rc_run_compiler_passes(c, list);
if (c->Debug & RC_DBG_STATS)
print_stats(c);
}
void rc_validate_final_shader(struct radeon_compiler *c, void *user)
{
/* Check the number of constants. */
if (c->Program.Constants.Count > c->max_constants) {
rc_error(c, "Too many constants. Max: %i, Got: %i\n",
c->max_constants, c->Program.Constants.Count);
}
}

View file

@ -0,0 +1,171 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#ifndef RADEON_COMPILER_H
#define RADEON_COMPILER_H
#include "main/compiler.h"
#include "memory_pool.h"
#include "radeon_code.h"
#include "radeon_program.h"
#include "radeon_emulate_loops.h"
#define RC_DBG_LOG (1 << 0)
#define RC_DBG_STATS (1 << 1)
struct rc_swizzle_caps;
enum rc_program_type {
RC_VERTEX_PROGRAM,
RC_FRAGMENT_PROGRAM,
RC_NUM_PROGRAM_TYPES
};
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
enum rc_program_type type;
unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
/* Hardware specification. */
unsigned is_r400:1;
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;
int max_alu_insts;
unsigned max_tex_insts;
/* Whether to remove unused constants and empty holes in constant space. */
unsigned remove_unused_constants:1;
/**
* Variables used internally, not be touched by callers
* of the compiler
*/
/*@{*/
struct rc_swizzle_caps * SwizzleCaps;
/*@}*/
struct emulate_loop_state loop_state;
unsigned initial_num_insts; /* Number of instructions at start. */
};
void rc_init(struct radeon_compiler * c);
void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
/**
* This macro acts like an if-statement that can be used to implement
* non-aborting assertions in the compiler.
*
* It checks whether \p cond is true. If not, an internal compiler error is
* flagged and the if-clause is run.
*
* A typical use-case would be:
*
* if (rc_assert(c, condition-that-must-be-true))
* return;
*/
#define rc_assert(c, cond) \
(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
struct rX00_fragment_program_code *code;
/* Optional transformations and features. */
struct r300_fragment_program_external_state state;
unsigned enable_shadow_ambient;
/* Register corresponding to the depthbuffer. */
unsigned OutputDepth;
/* Registers corresponding to the four colorbuffers. */
unsigned OutputColor[4];
void * UserData;
void (*AllocateHwInputs)(
struct r300_fragment_program_compiler * c,
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata);
};
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
uint32_t RequiredOutputs;
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
int PredicateIndex;
unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
struct radeon_compiler_pass {
const char *name; /* Name of the pass. */
int dump; /* Dump the program if Debug == 1? */
int predicate; /* Run this pass? */
void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
void *user; /* Optional parameter which is passed to the run function. */
};
struct rc_program_stats {
unsigned num_insts;
unsigned num_fc_insts;
unsigned num_tex_insts;
unsigned num_rgb_insts;
unsigned num_alpha_insts;
unsigned num_presub_ops;
unsigned num_temp_regs;
};
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_validate_final_shader(struct radeon_compiler *c, void *user);
#endif /* RADEON_COMPILER_H */

View file

@ -0,0 +1,701 @@
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*/
#include "radeon_compiler_util.h"
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
{
unsigned int mask = 0;
unsigned int i;
for(i = 0; i < 4; i++) {
mask |= 1 << GET_SWZ(swz, i);
}
mask &= RC_MASK_XYZW;
return mask;
}
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
{
if (idx & 0x4)
return idx;
return GET_SWZ(swz, idx);
}
/**
* The purpose of this function is to standardize the number channels used by
* swizzles. All swizzles regardless of what instruction they are a part of
* should have 4 channels initialized with values.
* @param channels The number of channels in initial_value that have a
* meaningful value.
* @return An initialized swizzle that has all of the unused channels set to
* RC_SWIZZLE_UNUSED.
*/
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
{
unsigned int i;
for (i = channels; i < 4; i++) {
SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
}
return initial_value;
}
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
unsigned int ret = 0;
ret |= get_swz(src, swz_x);
ret |= get_swz(src, swz_y) << 3;
ret |= get_swz(src, swz_z) << 6;
ret |= get_swz(src, swz_w) << 9;
return ret;
}
unsigned int combine_swizzles(unsigned int src, unsigned int swz)
{
unsigned int ret = 0;
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
return ret;
}
/**
* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
*/
rc_swizzle rc_mask_to_swizzle(unsigned int mask)
{
switch (mask) {
case RC_MASK_X: return RC_SWIZZLE_X;
case RC_MASK_Y: return RC_SWIZZLE_Y;
case RC_MASK_Z: return RC_SWIZZLE_Z;
case RC_MASK_W: return RC_SWIZZLE_W;
}
return RC_SWIZZLE_UNUSED;
}
/* Reorder mask bits according to swizzle. */
unsigned swizzle_mask(unsigned swizzle, unsigned mask)
{
unsigned ret = 0;
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned swz = GET_SWZ(swizzle, chan);
if (swz < 4)
ret |= GET_BIT(mask, swz) << chan;
}
return ret;
}
static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
{
if (info->HasTexture) {
return 0;
}
switch (info->Opcode) {
case RC_OPCODE_DP2:
case RC_OPCODE_DP3:
case RC_OPCODE_DP4:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
return 0;
default:
return 1;
}
}
/**
* @return A swizzle the results from converting old_swizzle using
* conversion_swizzle
*/
unsigned int rc_adjust_channels(
unsigned int old_swizzle,
unsigned int conversion_swizzle)
{
unsigned int i;
unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
for (i = 0; i < 4; i++) {
unsigned int new_chan = get_swz(conversion_swizzle, i);
if (new_chan == RC_SWIZZLE_UNUSED) {
continue;
}
SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
}
return new_swizzle;
}
static unsigned int rewrite_writemask(
unsigned int old_mask,
unsigned int conversion_swizzle)
{
unsigned int new_mask = 0;
unsigned int i;
for (i = 0; i < 4; i++) {
if (!GET_BIT(old_mask, i)
|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
continue;
}
new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
}
return new_mask;
}
/**
* This function rewrites the writemask of sub and adjusts the swizzles
* of all its source registers based on the conversion_swizzle.
* conversion_swizzle represents a mapping of the old writemask to the
* new writemask. For a detailed description of how conversion swizzles
* work see rc_rewrite_swizzle().
*/
void rc_pair_rewrite_writemask(
struct rc_pair_sub_instruction * sub,
unsigned int conversion_swizzle)
{
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
unsigned int i;
sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
if (!srcs_need_rewrite(info)) {
return ;
}
for (i = 0; i < info->NumSrcRegs; i++) {
sub->Arg[i].Swizzle =
rc_adjust_channels(sub->Arg[i].Swizzle,
conversion_swizzle);
}
}
static void normal_rewrite_writemask_cb(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
unsigned int * new_mask = (unsigned int *)userdata;
src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
}
/**
* This function is the same as rc_pair_rewrite_writemask() except it
* operates on normal instructions.
*/
void rc_normal_rewrite_writemask(
struct rc_instruction * inst,
unsigned int conversion_swizzle)
{
unsigned int new_mask;
struct rc_sub_instruction * sub = &inst->U.I;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
sub->DstReg.WriteMask =
rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
if (info->HasTexture) {
unsigned int i;
assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
for (i = 0; i < 4; i++) {
unsigned int swz = GET_SWZ(conversion_swizzle, i);
if (swz > 3)
continue;
SET_SWZ(sub->TexSwizzle, swz, i);
}
}
if (!srcs_need_rewrite(info)) {
return;
}
new_mask = sub->DstReg.WriteMask;
rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
}
/**
* This function replaces each value 'swz' in swizzle with the value of
* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
* to change all the Y's in swizzle to X, then conversion_swizzle should be
* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
* conversion swizzle should be YX__ (0xfc1).
* @param swizzle The swizzle to change
* @param conversion_swizzle Describes the conversion to perform on the swizzle
* @return A converted swizzle
*/
unsigned int rc_rewrite_swizzle(
unsigned int swizzle,
unsigned int conversion_swizzle)
{
unsigned int chan;
unsigned int out_swizzle = swizzle;
for (chan = 0; chan < 4; chan++) {
unsigned int swz = GET_SWZ(swizzle, chan);
unsigned int new_swz;
if (swz > 3) {
SET_SWZ(out_swizzle, chan, swz);
} else {
new_swz = GET_SWZ(conversion_swizzle, swz);
if (new_swz != RC_SWIZZLE_UNUSED) {
SET_SWZ(out_swizzle, chan, new_swz);
} else {
SET_SWZ(out_swizzle, chan, swz);
}
}
}
return out_swizzle;
}
/**
* Left multiplication of a register with a swizzle
*/
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
{
struct rc_src_register tmp = srcreg;
int i;
tmp.Swizzle = 0;
tmp.Negate = 0;
for(i = 0; i < 4; ++i) {
rc_swizzle swz = GET_SWZ(swizzle, i);
if (swz < 4) {
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
} else {
tmp.Swizzle |= swz << (i*3);
}
}
return tmp;
}
void reset_srcreg(struct rc_src_register* reg)
{
memset(reg, 0, sizeof(struct rc_src_register));
reg->Swizzle = RC_SWIZZLE_XYZW;
}
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
unsigned int src_swz,
rc_register_file dst_file,
unsigned int dst_idx,
unsigned int dst_mask)
{
if (src_file != dst_file || src_idx != dst_idx) {
return RC_MASK_NONE;
}
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
/**
* @return A bit mask specifying whether this swizzle will select from an RGB
* source, an Alpha source, or both.
*/
unsigned int rc_source_type_swz(unsigned int swizzle)
{
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
unsigned int ret = RC_SOURCE_NONE;
for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz == RC_SWIZZLE_W) {
ret |= RC_SOURCE_ALPHA;
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z) {
ret |= RC_SOURCE_RGB;
}
}
return ret;
}
unsigned int rc_source_type_mask(unsigned int mask)
{
unsigned int ret = RC_SOURCE_NONE;
if (mask & RC_MASK_XYZ)
ret |= RC_SOURCE_RGB;
if (mask & RC_MASK_W)
ret |= RC_SOURCE_ALPHA;
return ret;
}
struct src_select {
rc_register_file File;
int Index;
unsigned int SrcType;
};
struct can_use_presub_data {
struct src_select Selects[5];
unsigned int SelectCount;
const struct rc_src_register * ReplaceReg;
unsigned int ReplaceRemoved;
};
static void can_use_presub_data_add_select(
struct can_use_presub_data * data,
rc_register_file file,
unsigned int index,
unsigned int src_type)
{
struct src_select * select;
select = &data->Selects[data->SelectCount++];
select->File = file;
select->Index = index;
select->SrcType = src_type;
}
/**
* This callback function counts the number of sources in inst that are
* different from the sources in can_use_presub_data->RemoveSrcs.
*/
static void can_use_presub_read_cb(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct can_use_presub_data * d = userdata;
if (!d->ReplaceRemoved && src == d->ReplaceReg) {
d->ReplaceRemoved = 1;
return;
}
if (src->File == RC_FILE_NONE)
return;
can_use_presub_data_add_select(d, src->File, src->Index,
rc_source_type_swz(src->Swizzle));
}
unsigned int rc_inst_can_use_presub(
struct rc_instruction * inst,
rc_presubtract_op presub_op,
unsigned int presub_writemask,
const struct rc_src_register * replace_reg,
const struct rc_src_register * presub_src0,
const struct rc_src_register * presub_src1)
{
struct can_use_presub_data d;
unsigned int num_presub_srcs;
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
int rgb_count = 0, alpha_count = 0;
unsigned int src_type0, src_type1;
if (presub_op == RC_PRESUB_NONE) {
return 1;
}
if (info->HasTexture) {
return 0;
}
/* We can't use more than one presubtract value in an
* instruction, unless the two prsubtract operations
* are the same and read from the same registers.
* XXX For now we will limit instructions to only one presubtract
* value.*/
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
return 0;
}
memset(&d, 0, sizeof(d));
d.ReplaceReg = replace_reg;
rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
src_type0 = rc_source_type_swz(presub_src0->Swizzle);
can_use_presub_data_add_select(&d,
presub_src0->File,
presub_src0->Index,
src_type0);
if (num_presub_srcs > 1) {
src_type1 = rc_source_type_swz(presub_src1->Swizzle);
can_use_presub_data_add_select(&d,
presub_src1->File,
presub_src1->Index,
src_type1);
/* Even if both of the presub sources read from the same
* register, we still need to use 2 different source selects
* for them, so we need to increment the count to compensate.
*/
if (presub_src0->File == presub_src1->File
&& presub_src0->Index == presub_src1->Index) {
if (src_type0 & src_type1 & RC_SOURCE_RGB) {
rgb_count++;
}
if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
alpha_count++;
}
}
}
/* Count the number of source selects for Alpha and RGB. If we
* encounter two of the same source selects then we can ignore the
* first one. */
for (i = 0; i < d.SelectCount; i++) {
unsigned int j;
unsigned int src_type = d.Selects[i].SrcType;
for (j = i + 1; j < d.SelectCount; j++) {
if (d.Selects[i].File == d.Selects[j].File
&& d.Selects[i].Index == d.Selects[j].Index) {
src_type &= ~d.Selects[j].SrcType;
}
}
if (src_type & RC_SOURCE_RGB) {
rgb_count++;
}
if (src_type & RC_SOURCE_ALPHA) {
alpha_count++;
}
}
if (rgb_count > 3 || alpha_count > 3) {
return 0;
}
return 1;
}
struct max_data {
unsigned int Max;
unsigned int HasFileType;
rc_register_file File;
};
static void max_callback(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct max_data * d = (struct max_data*)userdata;
if (file == d->File && (!d->HasFileType || index > d->Max)) {
d->Max = index;
d->HasFileType = 1;
}
}
/**
* @return The maximum index of the specified register file used by the
* program.
*/
int rc_get_max_index(
struct radeon_compiler * c,
rc_register_file file)
{
struct max_data data;
struct rc_instruction * inst;
data.Max = 0;
data.HasFileType = 0;
data.File = file;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
rc_for_all_reads_mask(inst, max_callback, &data);
rc_for_all_writes_mask(inst, max_callback, &data);
}
if (!data.HasFileType) {
return -1;
} else {
return data.Max;
}
}
static unsigned int get_source_readmask(
struct rc_pair_sub_instruction * sub,
unsigned int source,
unsigned int src_type)
{
unsigned int i;
unsigned int readmask = 0;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for (i = 0; i < info->NumSrcRegs; i++) {
if (sub->Arg[i].Source != source
|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
continue;
}
readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
}
return readmask;
}
/**
* This function attempts to remove a source from a pair instructions.
* @param inst
* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
* @param source The index of the source to remove
* @param new_readmask A mask representing the components that are read by
* the source that is intended to replace the one you are removing. If you
* want to remove a source only and not replace it, this parameter should be
* zero.
* @return 1 if the source was successfully removed, 0 if it was not
*/
unsigned int rc_pair_remove_src(
struct rc_instruction * inst,
unsigned int src_type,
unsigned int source,
unsigned int new_readmask)
{
unsigned int readmask = 0;
readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
if ((new_readmask & readmask) != readmask)
return 0;
if (src_type & RC_SOURCE_RGB) {
memset(&inst->U.P.RGB.Src[source], 0,
sizeof(struct rc_pair_instruction_source));
}
if (src_type & RC_SOURCE_ALPHA) {
memset(&inst->U.P.Alpha.Src[source], 0,
sizeof(struct rc_pair_instruction_source));
}
return 1;
}
/**
* @return RC_OPCODE_NOOP if inst is not a flow control instruction.
* @return The opcode of inst if it is a flow control instruction.
*/
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
{
const struct rc_opcode_info * info;
if (inst->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(inst->U.I.Opcode);
} else {
info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
/*A flow control instruction shouldn't have an alpha
* instruction.*/
assert(!info->IsFlowControl ||
inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
}
if (info->IsFlowControl)
return info->Opcode;
else
return RC_OPCODE_NOP;
}
/**
* @return The BGNLOOP instruction that starts the loop ended by endloop.
*/
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
{
unsigned int endloop_count = 0;
struct rc_instruction * inst;
for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_ENDLOOP) {
endloop_count++;
} else if (op == RC_OPCODE_BGNLOOP) {
if (endloop_count == 0) {
return inst;
} else {
endloop_count--;
}
}
}
return NULL;
}
/**
* @return The ENDLOOP instruction that ends the loop started by bgnloop.
*/
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
{
unsigned int bgnloop_count = 0;
struct rc_instruction * inst;
for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_BGNLOOP) {
bgnloop_count++;
} else if (op == RC_OPCODE_ENDLOOP) {
if (bgnloop_count == 0) {
return inst;
} else {
bgnloop_count--;
}
}
}
return NULL;
}
/**
* @return A conversion swizzle for converting from old_mask->new_mask
*/
unsigned int rc_make_conversion_swizzle(
unsigned int old_mask,
unsigned int new_mask)
{
unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
unsigned int old_idx;
unsigned int new_idx = 0;
for (old_idx = 0; old_idx < 4; old_idx++) {
if (!GET_BIT(old_mask, old_idx))
continue;
for ( ; new_idx < 4; new_idx++) {
if (GET_BIT(new_mask, new_idx)) {
SET_SWZ(conversion_swizzle, old_idx, new_idx);
new_idx++;
break;
}
}
}
return conversion_swizzle;
}

View file

@ -0,0 +1,89 @@
#include "radeon_program_constants.h"
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
#include "radeon_opcodes.h"
struct radeon_compiler;
struct rc_instruction;
struct rc_pair_instruction;
struct rc_pair_sub_instruction;
struct rc_src_register;
unsigned int rc_swizzle_to_writemask(unsigned int swz);
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y,
rc_swizzle swz_z, rc_swizzle swz_w);
unsigned int combine_swizzles(unsigned int src, unsigned int swz);
rc_swizzle rc_mask_to_swizzle(unsigned int mask);
unsigned swizzle_mask(unsigned swizzle, unsigned mask);
unsigned int rc_adjust_channels(
unsigned int old_swizzle,
unsigned int conversion_swizzle);
void rc_pair_rewrite_writemask(
struct rc_pair_sub_instruction * sub,
unsigned int conversion_swizzle);
void rc_normal_rewrite_writemask(
struct rc_instruction * inst,
unsigned int conversion_swizzle);
unsigned int rc_rewrite_swizzle(
unsigned int swizzle,
unsigned int new_mask);
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
void reset_srcreg(struct rc_src_register* reg);
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
unsigned int src_swz,
rc_register_file dst_file,
unsigned int dst_idx,
unsigned int dst_mask);
unsigned int rc_source_type_swz(unsigned int swizzle);
unsigned int rc_source_type_mask(unsigned int mask);
unsigned int rc_inst_can_use_presub(
struct rc_instruction * inst,
rc_presubtract_op presub_op,
unsigned int presub_writemask,
const struct rc_src_register * replace_reg,
const struct rc_src_register * presub_src0,
const struct rc_src_register * presub_src1);
int rc_get_max_index(
struct radeon_compiler * c,
rc_register_file file);
unsigned int rc_pair_remove_src(
struct rc_instruction * inst,
unsigned int src_type,
unsigned int source,
unsigned int new_readmask);
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
unsigned int rc_make_conversion_swizzle(
unsigned int old_mask,
unsigned int new_mask);
#endif /* RADEON_PROGRAM_UTIL_H */

View file

@ -0,0 +1,892 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_program.h"
struct read_write_mask_data {
void * UserData;
rc_read_write_mask_fn Cb;
};
static void reads_normal_callback(
void * userdata,
struct rc_instruction * fullinst,
struct rc_src_register * src)
{
struct read_write_mask_data * cb_data = userdata;
unsigned int refmask = 0;
unsigned int chan;
for(chan = 0; chan < 4; chan++) {
refmask |= 1 << GET_SWZ(src->Swizzle, chan);
}
refmask &= RC_MASK_XYZW;
if (refmask) {
cb_data->Cb(cb_data->UserData, fullinst, src->File,
src->Index, refmask);
}
if (refmask && src->RelAddr) {
cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
RC_MASK_X);
}
}
static void pair_get_src_refmasks(unsigned int * refmasks,
struct rc_pair_instruction * inst,
unsigned int swz, unsigned int src)
{
if (swz >= 4)
return;
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
if(src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs =
rc_presubtract_src_reg_count(
inst->RGB.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
if (swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs = rc_presubtract_src_reg_count(
inst->Alpha.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
}
static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
unsigned int arg;
for(arg = 0; arg < 3; ++arg) {
unsigned int chan;
for(chan = 0; chan < 3; ++chan) {
unsigned int swz_rgb =
GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
unsigned int swz_alpha =
GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
pair_get_src_refmasks(refmasks, inst, swz_rgb,
inst->RGB.Arg[arg].Source);
pair_get_src_refmasks(refmasks, inst, swz_alpha,
inst->Alpha.Arg[arg].Source);
}
}
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
refmasks[src] & RC_MASK_XYZ);
if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
static void pair_sub_for_all_args(
struct rc_instruction * fullinst,
struct rc_pair_sub_instruction * sub,
rc_pair_read_arg_fn cb,
void * userdata)
{
int i;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int src_type;
src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
if (src_type == RC_SOURCE_NONE)
continue;
if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
unsigned int presub_type;
unsigned int presub_src_count;
struct rc_pair_instruction_source * src_array;
unsigned int j;
if (src_type & RC_SOURCE_RGB) {
presub_type = fullinst->
U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
src_array = fullinst->U.P.RGB.Src;
} else {
presub_type = fullinst->
U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
src_array = fullinst->U.P.Alpha.Src;
}
presub_src_count
= rc_presubtract_src_reg_count(presub_type);
for(j = 0; j < presub_src_count; j++) {
cb(userdata, fullinst, &sub->Arg[i],
&src_array[j]);
}
} else {
struct rc_pair_instruction_source * src =
rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
if (src) {
cb(userdata, fullinst, &sub->Arg[i], src);
}
}
}
}
/* This function calls the callback function (cb) for each source used by
* the instruction.
* */
void rc_for_all_reads_src(
struct rc_instruction * inst,
rc_read_src_fn cb,
void * userdata)
{
const struct rc_opcode_info * opcode =
rc_get_opcode_info(inst->U.I.Opcode);
/* This function only works with normal instructions. */
if (inst->Type != RC_INSTRUCTION_NORMAL) {
assert(0);
return;
}
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
continue;
if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_regs = rc_presubtract_src_reg_count(
inst->U.I.PreSub.Opcode);
for( i = 0; i < srcp_regs; i++) {
cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
}
} else {
cb(userdata, inst, &inst->U.I.SrcReg[src]);
}
}
}
/**
* This function calls the callback function (cb) for each arg of the RGB and
* alpha components.
*/
void rc_pair_for_all_reads_arg(
struct rc_instruction * inst,
rc_pair_read_arg_fn cb,
void * userdata)
{
/* This function only works with pair instructions. */
if (inst->Type != RC_INSTRUCTION_PAIR) {
assert(0);
return;
}
pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
}
/**
* Calls a callback function for all register reads.
*
* This is conservative, i.e. if the same register is referenced multiple times,
* the callback may also be called multiple times.
* Also, the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
struct read_write_mask_data cb_data;
cb_data.UserData = userdata;
cb_data.Cb = cb;
rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
} else {
reads_pair(inst, cb, userdata);
}
}
static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
if (opcode->HasDstReg && inst->DstReg.WriteMask)
cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
if (inst->RGB.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
if (inst->Alpha.WriteMask)
cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
if (inst->WriteALUResult)
cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
/**
* Calls a callback function for all register writes in the instruction,
* reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
} else {
writes_pair(inst, cb, userdata);
}
}
struct mask_to_chan_data {
void * UserData;
rc_read_write_chan_fn Fn;
};
static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct mask_to_chan_data * d = data;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(mask, chan))
d->Fn(d->UserData, inst, file, index, chan);
}
}
/**
* Calls a callback function for all sourced register channels.
*
* This is conservative, i.e. channels may be called multiple times,
* and the writemask of the instruction is not taken into account.
*/
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
}
/**
* Calls a callback function for all written register channels.
*
* \warning Does not report output registers for paired instructions!
*/
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
{
struct mask_to_chan_data d;
d.UserData = userdata;
d.Fn = cb;
rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
}
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
unsigned int remapped_presub = 0;
if (opcode->HasDstReg) {
rc_register_file file = inst->DstReg.File;
unsigned int index = inst->DstReg.Index;
cb(userdata, fullinst, &file, &index);
inst->DstReg.File = file;
inst->DstReg.Index = index;
}
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
rc_register_file file = inst->SrcReg[src].File;
unsigned int index = inst->SrcReg[src].Index;
if (file == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
/* Make sure we only remap presubtract sources once in
* case more than one source register reads the
* presubtract result. */
if (remapped_presub)
continue;
for(i = 0; i < srcp_srcs; i++) {
file = inst->PreSub.SrcReg[i].File;
index = inst->PreSub.SrcReg[i].Index;
cb(userdata, fullinst, &file, &index);
inst->PreSub.SrcReg[i].File = file;
inst->PreSub.SrcReg[i].Index = index;
}
remapped_presub = 1;
}
else {
cb(userdata, fullinst, &file, &index);
inst->SrcReg[src].File = file;
inst->SrcReg[src].Index = index;
}
}
}
static void remap_pair_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
if (inst->RGB.WriteMask) {
rc_register_file file = RC_FILE_TEMPORARY;
unsigned int index = inst->RGB.DestIndex;
cb(userdata, fullinst, &file, &index);
inst->RGB.DestIndex = index;
}
if (inst->Alpha.WriteMask) {
rc_register_file file = RC_FILE_TEMPORARY;
unsigned int index = inst->Alpha.DestIndex;
cb(userdata, fullinst, &file, &index);
inst->Alpha.DestIndex = index;
}
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used) {
rc_register_file file = inst->RGB.Src[src].File;
unsigned int index = inst->RGB.Src[src].Index;
cb(userdata, fullinst, &file, &index);
inst->RGB.Src[src].File = file;
inst->RGB.Src[src].Index = index;
}
if (inst->Alpha.Src[src].Used) {
rc_register_file file = inst->Alpha.Src[src].File;
unsigned int index = inst->Alpha.Src[src].Index;
cb(userdata, fullinst, &file, &index);
inst->Alpha.Src[src].File = file;
inst->Alpha.Src[src].Index = index;
}
}
}
/**
* Remap all register accesses according to the given function.
* That is, call the function \p cb for each referenced register (both read and written)
* and update the given instruction \p inst accordingly
* if it modifies its \ref pfile and \ref pindex contents.
*/
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL)
remap_normal_instruction(inst, cb, userdata);
else
remap_pair_instruction(inst, cb, userdata);
}
struct branch_write_mask {
unsigned int IfWriteMask:4;
unsigned int ElseWriteMask:4;
unsigned int HasElse:1;
};
union get_readers_read_cb {
rc_read_src_fn I;
rc_pair_read_arg_fn P;
};
struct get_readers_callback_data {
struct radeon_compiler * C;
struct rc_reader_data * ReaderData;
rc_read_src_fn ReadNormalCB;
rc_pair_read_arg_fn ReadPairCB;
rc_read_write_mask_fn WriteCB;
rc_register_file DstFile;
unsigned int DstIndex;
unsigned int DstMask;
unsigned int AliveWriteMask;
/* For convenience, this is indexed starting at 1 */
struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
static struct rc_reader * add_reader(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
data->ReaderCount, data->ReadersReserved, 1);
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
return new;
}
static void add_reader_normal(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
struct rc_src_register * src)
{
struct rc_reader * new = add_reader(pool, data, inst, mask);
new->U.I.Src = src;
}
static void add_reader_pair(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
struct rc_reader * new = add_reader(pool, data, inst, mask);
new->U.P.Src = src;
new->U.P.Arg = arg;
}
static unsigned int get_readers_read_callback(
struct get_readers_callback_data * cb_data,
unsigned int has_rel_addr,
rc_register_file file,
unsigned int index,
unsigned int swizzle)
{
unsigned int shared_mask, read_mask;
if (has_rel_addr) {
cb_data->ReaderData->Abort = 1;
return RC_MASK_NONE;
}
shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
if (shared_mask == RC_MASK_NONE)
return shared_mask;
/* If we make it this far, it means that this source reads from the
* same register written to by d->ReaderData->Writer. */
read_mask = rc_swizzle_to_writemask(swizzle);
if (cb_data->ReaderData->AbortOnRead & read_mask) {
cb_data->ReaderData->Abort = 1;
return shared_mask;
}
if (cb_data->ReaderData->LoopDepth > 0) {
cb_data->ReaderData->AbortOnWrite |=
(read_mask & cb_data->AliveWriteMask);
}
/* XXX The behavior in this case should be configurable. */
if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
cb_data->ReaderData->Abort = 1;
return shared_mask;
}
return shared_mask;
}
static void get_readers_pair_read_callback(
void * userdata,
struct rc_instruction * inst,
struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src)
{
unsigned int shared_mask;
struct get_readers_callback_data * d = userdata;
shared_mask = get_readers_read_callback(d,
0 /*Pair Instructions don't use RelAddr*/,
src->File, src->Index, arg->Swizzle);
if (shared_mask == RC_MASK_NONE)
return;
if (d->ReadPairCB)
d->ReadPairCB(d->ReaderData, inst, arg, src);
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
}
/**
* This function is used by rc_get_readers_normal() to determine whether inst
* is a reader of userdata->ReaderData->Writer
*/
static void get_readers_normal_read_callback(
void * userdata,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct get_readers_callback_data * d = userdata;
unsigned int shared_mask;
shared_mask = get_readers_read_callback(d,
src->RelAddr, src->File, src->Index, src->Swizzle);
if (shared_mask == RC_MASK_NONE)
return;
/* The callback function could potentially clear d->ReaderData->Abort,
* so we need to call it before we return. */
if (d->ReadNormalCB)
d->ReadNormalCB(d->ReaderData, inst, src);
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
}
/**
* This function is used by rc_get_readers_normal() to determine when
* userdata->ReaderData->Writer is dead (i. e. All compontents of its
* destination register have been overwritten by other instructions).
*/
static void get_readers_write_callback(
void *userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct get_readers_callback_data * d = userdata;
if (index == d->DstIndex && file == d->DstFile) {
unsigned int shared_mask = mask & d->DstMask;
d->ReaderData->AbortOnRead &= ~shared_mask;
d->AliveWriteMask &= ~shared_mask;
if (d->ReaderData->AbortOnWrite & shared_mask) {
d->ReaderData->Abort = 1;
}
}
if(d->WriteCB)
d->WriteCB(d->ReaderData, inst, file, index, mask);
}
static void push_branch_mask(
struct get_readers_callback_data * d,
unsigned int * branch_depth)
{
(*branch_depth)++;
if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
d->ReaderData->Abort = 1;
return;
}
d->BranchMasks[*branch_depth].IfWriteMask =
d->AliveWriteMask;
}
static void pop_branch_mask(
struct get_readers_callback_data * d,
unsigned int * branch_depth)
{
struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
if (masks->HasElse) {
/* Abort on read for components that were written in the IF
* block. */
d->ReaderData->AbortOnRead |=
masks->IfWriteMask & ~masks->ElseWriteMask;
/* Abort on read for components that were written in the ELSE
* block. */
d->ReaderData->AbortOnRead |=
masks->ElseWriteMask & ~d->AliveWriteMask;
d->AliveWriteMask = masks->IfWriteMask
^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
& (masks->IfWriteMask ^ d->AliveWriteMask));
} else {
d->ReaderData->AbortOnRead |=
masks->IfWriteMask & ~d->AliveWriteMask;
d->AliveWriteMask = masks->IfWriteMask;
}
memset(masks, 0, sizeof(struct branch_write_mask));
(*branch_depth)--;
}
static void get_readers_for_single_write(
void * userdata,
struct rc_instruction * writer,
rc_register_file dst_file,
unsigned int dst_index,
unsigned int dst_mask)
{
struct rc_instruction * tmp;
unsigned int branch_depth = 0;
struct rc_instruction * endloop = NULL;
unsigned int abort_on_read_at_endloop = 0;
struct get_readers_callback_data * d = userdata;
d->ReaderData->Writer = writer;
d->ReaderData->AbortOnRead = 0;
d->ReaderData->AbortOnWrite = 0;
d->ReaderData->LoopDepth = 0;
d->ReaderData->InElse = 0;
d->DstFile = dst_file;
d->DstIndex = dst_index;
d->DstMask = dst_mask;
d->AliveWriteMask = dst_mask;
memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
if (!dst_mask)
return;
for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
tmp = tmp->Next){
rc_opcode opcode = rc_get_flow_control_inst(tmp);
switch(opcode) {
case RC_OPCODE_BGNLOOP:
d->ReaderData->LoopDepth++;
push_branch_mask(d, &branch_depth);
break;
case RC_OPCODE_ENDLOOP:
if (d->ReaderData->LoopDepth > 0) {
d->ReaderData->LoopDepth--;
if (d->ReaderData->LoopDepth == 0) {
d->ReaderData->AbortOnWrite = 0;
}
pop_branch_mask(d, &branch_depth);
} else {
/* Here we have reached an ENDLOOP without
* seeing its BGNLOOP. These means that
* the writer was written inside of a loop,
* so it could have readers that are above it
* (i.e. they have a lower IP). To find these
* readers we jump to the BGNLOOP instruction
* and check each instruction until we get
* back to the writer.
*/
endloop = tmp;
tmp = rc_match_endloop(tmp);
if (!tmp) {
rc_error(d->C, "Failed to match endloop.\n");
d->ReaderData->Abort = 1;
return;
}
abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
d->ReaderData->AbortOnRead |= d->AliveWriteMask;
continue;
}
break;
case RC_OPCODE_IF:
push_branch_mask(d, &branch_depth);
break;
case RC_OPCODE_ELSE:
if (branch_depth == 0) {
d->ReaderData->InElse = 1;
} else {
unsigned int temp_mask = d->AliveWriteMask;
d->AliveWriteMask =
d->BranchMasks[branch_depth].IfWriteMask;
d->BranchMasks[branch_depth].ElseWriteMask =
temp_mask;
d->BranchMasks[branch_depth].HasElse = 1;
}
break;
case RC_OPCODE_ENDIF:
if (branch_depth == 0) {
d->ReaderData->AbortOnRead = d->AliveWriteMask;
d->ReaderData->InElse = 0;
}
else {
pop_branch_mask(d, &branch_depth);
}
break;
default:
break;
}
if (d->ReaderData->InElse)
continue;
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
rc_for_all_reads_src(tmp,
get_readers_normal_read_callback, d);
} else {
rc_pair_for_all_reads_arg(tmp,
get_readers_pair_read_callback, d);
}
/* This can happen when we jump from an ENDLOOP to BGNLOOP */
if (tmp == writer) {
tmp = endloop;
endloop = NULL;
d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
continue;
}
rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
if (branch_depth == 0 && !d->AliveWriteMask)
return;
}
}
static void init_get_readers_callback_data(
struct get_readers_callback_data * d,
struct rc_reader_data * reader_data,
struct radeon_compiler * c,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
reader_data->Abort = 0;
reader_data->ReaderCount = 0;
reader_data->ReadersReserved = 0;
reader_data->Readers = NULL;
d->C = c;
d->ReaderData = reader_data;
d->ReadNormalCB = read_normal_cb;
d->ReadPairCB = read_pair_cb;
d->WriteCB = write_cb;
}
/**
* This function will create a list of readers via the rc_reader_data struct.
* This function will abort (set the flag data->Abort) and return if it
* encounters an instruction that reads from @param writer and also a different
* instruction. Here are some examples:
*
* writer = instruction 0;
* 0 MOV TEMP[0].xy, TEMP[1].xy
* 1 MOV TEMP[0].zw, TEMP[2].xy
* 2 MOV TEMP[3], TEMP[0]
* The Abort flag will be set on instruction 2, because it reads values written
* by instructions 0 and 1.
*
* writer = instruction 1;
* 0 IF TEMP[0].x
* 1 MOV TEMP[1], TEMP[2]
* 2 ELSE
* 3 MOV TEMP[1], TEMP[2]
* 4 ENDIF
* 5 MOV TEMP[3], TEMP[1]
* The Abort flag will be set on instruction 5, because it could read from the
* value written by either instruction 1 or 3, depending on the jump decision
* made at instruction 0.
*
* writer = instruction 0;
* 0 MOV TEMP[0], TEMP[1]
* 2 BGNLOOP
* 3 ADD TEMP[0], TEMP[0], none.1
* 4 ENDLOOP
* The Abort flag will be set on instruction 3, because in the first iteration
* of the loop it reads the value written by instruction 0 and in all other
* iterations it reads the value written by instruction 3.
*
* @param read_cb This function will be called for for every instruction that
* has been determined to be a reader of writer.
* @param write_cb This function will be called for every instruction after
* writer.
*/
void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
struct get_readers_callback_data d;
init_get_readers_callback_data(&d, data, c, read_normal_cb,
read_pair_cb, write_cb);
rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
void rc_get_readers_sub(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_pair_sub_instruction * sub_writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
struct get_readers_callback_data d;
init_get_readers_callback_data(&d, data, c, read_normal_cb,
read_pair_cb, write_cb);
if (sub_writer->WriteMask) {
get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
sub_writer->DestIndex, sub_writer->WriteMask);
}
}

View file

@ -0,0 +1,134 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_DATAFLOW_H
#define RADEON_DATAFLOW_H
#include "radeon_program_constants.h"
struct radeon_compiler;
struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
struct rc_pair_instruction_source;
struct rc_pair_sub_instruction;
struct rc_compiler;
/**
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask);
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
struct rc_src_register * src);
void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
void * userdata);
typedef void (*rc_pair_read_arg_fn)(void * userdata,
struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
struct rc_pair_instruction_source * src);
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
rc_pair_read_arg_fn cb, void * userdata);
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
/*@}*/
struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
union {
struct {
struct rc_src_register * Src;
} I;
struct {
struct rc_pair_instruction_arg * Arg;
struct rc_pair_instruction_source * Src;
} P;
} U;
};
struct rc_reader_data {
unsigned int Abort;
unsigned int AbortOnRead;
unsigned int AbortOnWrite;
unsigned int LoopDepth;
unsigned int InElse;
struct rc_instruction * Writer;
unsigned int ReaderCount;
unsigned int ReadersReserved;
struct rc_reader * Readers;
/* If this flag is enabled, rc_get_readers will exit as soon possbile
* after the Abort flag is set.*/
unsigned int ExitOnAbort;
void * CbData;
};
void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
void rc_get_readers_sub(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_pair_sub_instruction * sub_writer,
struct rc_reader_data * data,
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
*/
/*@{*/
typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
/*@}*/
void rc_optimize(struct radeon_compiler * c, void *user);
#endif /* RADEON_DATAFLOW_H */

View file

@ -0,0 +1,359 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
struct updatemask_state {
unsigned char Output[RC_REGISTER_MAX_INDEX];
unsigned char Temporary[RC_REGISTER_MAX_INDEX];
unsigned char Address;
unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
};
struct instruction_state {
unsigned char WriteMask:4;
unsigned char WriteALUResult:1;
unsigned char SrcReg[3];
};
struct loopinfo {
struct updatemask_state * Breaks;
unsigned int BreakCount;
unsigned int BreaksReserved;
};
struct branchinfo {
unsigned int HaveElse:1;
struct updatemask_state StoreEndif;
struct updatemask_state StoreElse;
};
struct deadcode_state {
struct radeon_compiler * C;
struct instruction_state * Instructions;
struct updatemask_state R;
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
struct loopinfo * LoopStack;
unsigned int LoopStackSize;
unsigned int LoopStackReserved;
};
static void or_updatemasks(
struct updatemask_state * dst,
struct updatemask_state * a,
struct updatemask_state * b)
{
for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
dst->Output[i] = a->Output[i] | b->Output[i];
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
}
for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
dst->Special[i] = a->Special[i] | b->Special[i];
dst->Address = a->Address | b->Address;
}
static void push_break(struct deadcode_state *s)
{
struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
}
static void push_loop(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
s->LoopStackSize, s->LoopStackReserved, 1);
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
}
static void push_branch(struct deadcode_state * s)
{
struct branchinfo * branch;
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
s->BranchStackSize, s->BranchStackReserved, 1);
branch = &s->BranchStack[s->BranchStackSize++];
branch->HaveElse = 0;
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
}
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
{
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
if (index >= RC_REGISTER_MAX_INDEX) {
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
return 0;
}
if (file == RC_FILE_OUTPUT)
return &s->R.Output[index];
else
return &s->R.Temporary[index];
} else if (file == RC_FILE_ADDRESS) {
return &s->R.Address;
} else if (file == RC_FILE_SPECIAL) {
if (index >= RC_NUM_SPECIAL_REGISTERS) {
rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
return 0;
}
return &s->R.Special[index];
}
return 0;
}
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
{
unsigned char * pused = get_used_ptr(s, file, index);
if (pused)
*pused |= mask;
}
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
struct instruction_state * insts = &s->Instructions[inst->IP];
unsigned int usedmask = 0;
unsigned int srcmasks[3];
if (opcode->HasDstReg) {
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
if (pused) {
usedmask = *pused & inst->U.I.DstReg.WriteMask;
*pused &= ~usedmask;
}
}
insts->WriteMask |= usedmask;
if (inst->U.I.WriteALUResult) {
unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
if (pused && *pused) {
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usedmask |= RC_MASK_X;
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
usedmask |= RC_MASK_W;
*pused = 0;
insts->WriteALUResult = 1;
}
}
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
unsigned int refmask = 0;
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
insts->SrcReg[src] |= newsrcmask;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_BIT(newsrcmask, chan))
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
}
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
refmask &= RC_MASK_XYZW;
if (!refmask)
continue;
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
if (inst->U.I.SrcReg[src].RelAddr)
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
static void mark_output_use(void * data, unsigned int index, unsigned int mask)
{
struct deadcode_state * s = data;
mark_used(s, RC_FILE_OUTPUT, index, mask);
}
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
{
struct deadcode_state s;
unsigned int nr_instructions;
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
unsigned int ip;
memset(&s, 0, sizeof(s));
s.C = c;
nr_instructions = rc_recompute_ips(c);
s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
dce(c, &s, &mark_output_use);
for(struct rc_instruction * inst = c->Program.Instructions.Prev;
inst != &c->Program.Instructions;
inst = inst->Prev) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
switch(opcode->Opcode){
/* Mark all sources in the loop body as used before doing
* normal deadcode analysis. This is probably not optimal.
*/
case RC_OPCODE_ENDLOOP:
{
int endloops = 1;
struct rc_instruction *ptr;
for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
opcode = rc_get_opcode_info(ptr->U.I.Opcode);
if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
endloops--;
continue;
}
if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
endloops++;
continue;
}
if(opcode->HasDstReg){
int src = 0;
unsigned int srcmasks[3];
rc_compute_sources_for_writemask(ptr,
ptr->U.I.DstReg.WriteMask, srcmasks);
for(src=0; src < opcode->NumSrcRegs; src++){
mark_used(&s,
ptr->U.I.SrcReg[src].File,
ptr->U.I.SrcReg[src].Index,
srcmasks[src]);
}
}
}
push_loop(&s);
break;
}
case RC_OPCODE_BRK:
push_break(&s);
break;
case RC_OPCODE_BGNLOOP:
{
unsigned int i;
struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
for(i = 0; i < loop->BreakCount; i++) {
or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
}
break;
}
case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
break;
default:
if (opcode->IsFlowControl && s.BranchStackSize) {
struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
if (opcode->Opcode == RC_OPCODE_IF) {
or_updatemasks(&s.R,
&s.R,
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
s.BranchStackSize--;
} else if (opcode->Opcode == RC_OPCODE_ELSE) {
if (branch->HaveElse) {
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
} else {
memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
branch->HaveElse = 1;
}
} else {
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
}
update_instruction(&s, inst);
}
ip = 0;
for(struct rc_instruction * inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next, ++ip) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
int dead = 1;
unsigned int srcmasks[3];
unsigned int usemask;
if (!opcode->HasDstReg) {
dead = 0;
} else {
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
if (s.Instructions[ip].WriteMask)
dead = 0;
if (s.Instructions[ip].WriteALUResult)
dead = 0;
else
inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
}
if (dead) {
struct rc_instruction * todelete = inst;
inst = inst->Prev;
rc_remove_instruction(todelete);
continue;
}
usemask = s.Instructions[ip].WriteMask;
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
usemask |= RC_MASK_X;
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
usemask |= RC_MASK_W;
rc_compute_sources_for_writemask(inst, usemask, srcmasks);
for(unsigned int src = 0; src < 3; ++src) {
for(unsigned int chan = 0; chan < 4; ++chan) {
if (!GET_BIT(srcmasks[src], chan))
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
}
}
}
rc_calculate_inputs_outputs(c);
}

View file

@ -0,0 +1,103 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
static void rewrite_source(struct radeon_compiler * c,
struct rc_instruction * inst, unsigned src)
{
struct rc_swizzle_split split;
unsigned int tempreg = rc_find_free_temporary(c);
unsigned int usemask;
usemask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
usemask |= 1 << chan;
}
c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
unsigned int phase_refmask;
unsigned int masked_negate;
mov->U.I.Opcode = RC_OPCODE_MOV;
mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
mov->U.I.DstReg.Index = tempreg;
mov->U.I.DstReg.WriteMask = split.Phase[phase];
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
mov->U.I.PreSub = inst->U.I.PreSub;
phase_refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
if (!GET_BIT(split.Phase[phase], chan))
SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
else
phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
}
phase_refmask &= RC_MASK_XYZW;
masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
if (masked_negate == 0)
mov->U.I.SrcReg[0].Negate = 0;
else if (masked_negate == split.Phase[phase])
mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
}
inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[src].Index = tempreg;
inst->U.I.SrcReg[src].Swizzle = 0;
inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
inst->U.I.SrcReg[src].Abs = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
}
}
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int src;
for(src = 0; src < opcode->NumSrcRegs; ++src) {
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
rewrite_source(c, inst, src);
}
}
}

View file

@ -0,0 +1,342 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "radeon_emulate_branches.h"
#include <stdio.h>
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct proxy_info {
unsigned int Proxied:1;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
struct register_proxies {
struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
};
struct branch_info {
struct rc_instruction * If;
struct rc_instruction * Else;
};
struct emulate_branch_state {
struct radeon_compiler * C;
struct branch_info * Branches;
unsigned int BranchCount;
unsigned int BranchReserved;
};
static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
struct rc_instruction * inst_mov;
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->BranchCount, s->BranchReserved, 1);
DBG("%s\n", __FUNCTION__);
branch = &s->Branches[s->BranchCount++];
memset(branch, 0, sizeof(struct branch_info));
branch->If = inst;
/* Make a safety copy of the decision register, because we will need
* it at ENDIF time and it might be overwritten in both branches. */
inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
inst->U.I.SrcReg[0].Swizzle = 0;
inst->U.I.SrcReg[0].Abs = 0;
inst->U.I.SrcReg[0].Negate = 0;
}
static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
if (!s->BranchCount) {
rc_error(s->C, "Encountered ELSE outside of branches");
return;
}
DBG("%s\n", __FUNCTION__);
branch = &s->Branches[s->BranchCount - 1];
branch->Else = inst;
}
struct state_and_proxies {
struct emulate_branch_state * S;
struct register_proxies * Proxies;
};
static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
rc_register_file file, unsigned int index)
{
if (file == RC_FILE_TEMPORARY) {
return &sap->Proxies->Temporary[index];
} else {
return 0;
}
}
static void scan_write(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int comp)
{
struct state_and_proxies * sap = userdata;
struct proxy_info * proxy = get_proxy_info(sap, file, index);
if (proxy && !proxy->Proxied) {
proxy->Proxied = 1;
proxy->Index = rc_find_free_temporary(sap->S->C);
}
}
static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
struct state_and_proxies * sap = userdata;
struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
if (proxy && proxy->Proxied) {
*pfile = RC_FILE_TEMPORARY;
*pindex = proxy->Index;
}
}
/**
* Redirect all writes in the instruction range [begin, end) to proxy
* temporary registers.
*/
static void allocate_and_insert_proxies(struct emulate_branch_state * s,
struct register_proxies * proxies,
struct rc_instruction * begin,
struct rc_instruction * end)
{
struct state_and_proxies sap;
sap.S = s;
sap.Proxies = proxies;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
if (proxies->Temporary[index].Proxied) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = index;
}
}
}
static void inject_cmp(struct emulate_branch_state * s,
struct rc_instruction * inst_if,
struct rc_instruction * inst_endif,
rc_register_file file, unsigned int index,
struct proxy_info ifproxy,
struct proxy_info elseproxy)
{
struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
inst_cmp->U.I.DstReg.File = file;
inst_cmp->U.I.DstReg.Index = index;
inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
inst_cmp->U.I.SrcReg[0].Abs = 1;
inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
}
static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
{
struct branch_info * branch;
struct register_proxies IfProxies;
struct register_proxies ElseProxies;
if (!s->BranchCount) {
rc_error(s->C, "Encountered ENDIF outside of branches");
return;
}
DBG("%s\n", __FUNCTION__);
branch = &s->Branches[s->BranchCount - 1];
memset(&IfProxies, 0, sizeof(IfProxies));
memset(&ElseProxies, 0, sizeof(ElseProxies));
allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
if (branch->Else)
allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
/* Insert the CMP instructions at the end. */
for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
IfProxies.Temporary[index], ElseProxies.Temporary[index]);
}
}
/* Remove all traces of the branch instructions */
rc_remove_instruction(branch->If);
if (branch->Else)
rc_remove_instruction(branch->Else);
rc_remove_instruction(inst);
s->BranchCount--;
if (VERBOSE) {
DBG("Program after ENDIF handling:\n");
rc_print_program(&s->C->Program);
}
}
struct remap_output_data {
unsigned int Output:RC_REGISTER_INDEX_BITS;
unsigned int Temporary:RC_REGISTER_INDEX_BITS;
};
static void remap_output_function(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
struct remap_output_data * data = userdata;
if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
*pfile = RC_FILE_TEMPORARY;
*pindex = data->Temporary;
}
}
/**
* Output registers cannot be read from and so cannot be dealt with like
* temporary registers.
*
* We do the simplest thing: If an output registers is written within
* a branch, then *all* writes to this register are proxied to a
* temporary register, and a final MOV is appended to the end of
* the program.
*/
static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode;
if (!s->BranchCount)
return;
opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (!opcode->HasDstReg)
return;
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
struct remap_output_data remap;
struct rc_instruction * inst_mov;
remap.Output = inst->U.I.DstReg.Index;
remap.Temporary = rc_find_free_temporary(s->C);
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
rc_remap_registers(inst, &remap_output_function, &remap);
}
inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
inst_mov->U.I.DstReg.Index = remap.Output;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
}
}
/**
* Remove branch instructions; instead, execute both branches
* on different register sets and choose between their results
* using CMP instructions in place of the original ENDIF.
*/
void rc_emulate_branches(struct radeon_compiler *c, void *user)
{
struct emulate_branch_state s;
struct rc_instruction * ptr;
memset(&s, 0, sizeof(s));
s.C = c;
/* Untypical loop because we may remove the current instruction */
ptr = c->Program.Instructions.Next;
while(ptr != &c->Program.Instructions) {
struct rc_instruction * inst = ptr;
ptr = ptr->Next;
if (inst->Type == RC_INSTRUCTION_NORMAL) {
switch(inst->U.I.Opcode) {
case RC_OPCODE_IF:
handle_if(&s, inst);
break;
case RC_OPCODE_ELSE:
handle_else(&s, inst);
break;
case RC_OPCODE_ENDIF:
handle_endif(&s, inst);
break;
default:
fix_output_writes(&s, inst);
break;
}
} else {
rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
}
}
}

View file

@ -0,0 +1,30 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#ifndef RADEON_EMULATE_BRANCHES_H
#define RADEON_EMULATE_BRANCHES_H
struct radeon_compiler;
void rc_emulate_branches(struct radeon_compiler *c, void *user);
#endif /* RADEON_EMULATE_BRANCHES_H */

View file

@ -0,0 +1,522 @@
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*/
#include "radeon_emulate_loops.h"
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
int HasValue;
};
struct count_inst {
struct radeon_compiler * C;
int Index;
rc_swizzle Swz;
float Amount;
int Unknown;
};
static float get_constant_value(struct radeon_compiler * c,
struct rc_src_register * src,
int chan)
{
float base = 1.0f;
int swz = GET_SWZ(src->Swizzle, chan);
if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
rc_error(c, "get_constant_value: Can't find a value.\n");
return 0.0f;
}
if(GET_BIT(src->Negate, chan)){
base = -1.0f;
}
return base *
c->Program.Constants.Constants[src->Index].u.Immediate[swz];
}
static int src_reg_is_immediate(struct rc_src_register * src,
struct radeon_compiler * c)
{
return src->File == RC_FILE_CONSTANT &&
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
struct loop_info * loop)
{
unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
return 1 + ((c->max_alu_insts - total_i) / loop_i);
}
static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
struct rc_instruction * first = loop->BeginLoop->Next;
struct rc_instruction * last = loop->EndLoop->Prev;
struct rc_instruction * append_to = last;
rc_remove_instruction(loop->BeginLoop);
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
}
}
}
static void update_const_value(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct const_value * value = data;
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
return;
}
value->HasValue = 1;
value->Value =
get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
break;
}
}
static void get_incr_amount(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct count_inst * count_inst = data;
int amnt_src_index;
const struct rc_opcode_info * opcode;
float amount;
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
if(opcode->NumSrcRegs != 2){
count_inst->Unknown = 1;
return;
}
if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[0].Index == count_inst->Index &&
inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
amnt_src_index = 1;
} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[1].Index == count_inst->Index &&
inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
amnt_src_index = 0;
}
else{
count_inst->Unknown = 1;
return;
}
if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
count_inst->C)){
amount = get_constant_value(count_inst->C,
&inst->U.I.SrcReg[amnt_src_index], 0);
}
else{
count_inst->Unknown = 1 ;
return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_ADD:
count_inst->Amount += amount;
break;
case RC_OPCODE_SUB:
if(amnt_src_index == 0){
count_inst->Unknown = 0;
return;
}
count_inst->Amount -= amount;
break;
default:
count_inst->Unknown = 1;
return;
}
}
/**
* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
* of how many iterations they have.
*/
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
{
int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
struct rc_src_register * counter;
struct rc_src_register * limit;
struct const_value counter_value;
struct rc_instruction * inst;
/* Find the counter and the upper limit */
if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
else{
DBG("No constant limit.\n");
return 0;
}
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
counter_value.C = c;
for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
if(!counter_value.HasValue){
DBG("Initial counter value cannot be determined.\n");
return 0;
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
* loops here.*/
case RC_OPCODE_BGNLOOP:
end_loops++;
break;
case RC_OPCODE_ENDLOOP:
loop->EndLoop = inst;
end_loops--;
break;
case RC_OPCODE_BRK:
/* Don't unroll loops if it has a BRK instruction
* other one used when testing the main conditional
* of the loop. */
/* Make sure we haven't entered a nested loops. */
if(inst != loop->Brk && end_loops == 1) {
return 0;
}
break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
break;
default:
rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
if(count_inst.Unknown){
return 0;
}
break;
}
}
/* Infinite loop */
if(count_inst.Amount == 0.0f){
return 0;
}
DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
/* The iteration calculations are opposite of what you would expect.
* In a normal loop, if the condition is met, then loop continues, but
* with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
case RC_OPCODE_SGT:
case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
default:
return 0;
}
if (c->max_alu_insts > 0
&& iterations > loop_max_possible_iterations(c, loop)) {
return 0;
}
DBG("Loop will have %d iterations.\n", iterations);
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
/**
* @param c
* @param loop
* @param inst A pointer to a BGNLOOP instruction.
* @return 1 if all of the members of loop where set.
* @return 0 if there was an error and some members of loop are still NULL.
*/
static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
struct rc_instruction * ptr;
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
return 0;
}
memset(loop, 0, sizeof(struct loop_info));
loop->BeginLoop = inst;
for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
if (ptr == &c->Program.Instructions) {
rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
__FUNCTION__);
return 0;
}
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
{
/* Nested loop, skip ahead to the end. */
unsigned int loop_depth = 1;
for(ptr = ptr->Next; ptr != &c->Program.Instructions;
ptr = ptr->Next){
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
loop_depth++;
} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
if (!--loop_depth) {
break;
}
}
}
if (ptr == &c->Program.Instructions) {
rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
__FUNCTION__);
return 0;
}
break;
}
case RC_OPCODE_BRK:
if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF
|| loop->Brk){
continue;
}
loop->Brk = ptr;
loop->If = ptr->Prev;
loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
case RC_OPCODE_SGT:
case RC_OPCODE_SLE:
case RC_OPCODE_SEQ:
case RC_OPCODE_SNE:
break;
default:
return 0;
}
loop->Cond = loop->If->Prev;
break;
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
&& loop->Cond && loop->EndLoop) {
return 1;
}
return 0;
}
/**
* This function prepares a loop to be unrolled by converting it into an if
* statement. Here is an outline of the conversion process:
* BGNLOOP; -> BGNLOOP;
* <Additional conditional code> -> <Additional conditional code>
* SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
* IF temp[0]; -> IF temp[0];
* BRK; ->
* ENDIF; -> <Loop Body>
* <Loop Body> -> ENDIF;
* ENDLOOP; -> ENDLOOP
*
* @param inst A pointer to a BGNLOOP instruction.
* @return 1 for success, 0 for failure
*/
static int transform_loop(struct emulate_loop_state * s,
struct rc_instruction * inst)
{
struct loop_info * loop;
memory_pool_array_reserve(&s->C->Pool, struct loop_info,
s->Loops, s->LoopCount, s->LoopReserved, 1);
loop = &s->Loops[s->LoopCount++];
if (!build_loop_info(s->C, loop, inst)) {
rc_error(s->C, "Failed to build loop info\n");
return 0;
}
if(try_unroll_loop(s->C, loop)){
return 1;
}
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
break;
case RC_OPCODE_SLT:
loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
break;
case RC_OPCODE_SLE:
loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
break;
case RC_OPCODE_SGT:
loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
break;
case RC_OPCODE_SEQ:
loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
break;
case RC_OPCODE_SNE:
loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
return 0;
}
/* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
return 1;
}
void rc_transform_loops(struct radeon_compiler *c, void *user)
{
struct emulate_loop_state * s = &c->loop_state;
struct rc_instruction * ptr;
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
for(ptr = s->C->Program.Instructions.Next;
ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
if (!transform_loop(s, ptr))
return;
}
}
}
void rc_unroll_loops(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
struct loop_info loop;
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
if (build_loop_info(c, &loop, inst)) {
try_unroll_loop(c, &loop);
}
}
}
}
void rc_emulate_loops(struct radeon_compiler *c, void *user)
{
struct emulate_loop_state * s = &c->loop_state;
int i;
/* Iterate backwards of the list of loops so that loops that nested
* loops are unrolled first.
*/
for( i = s->LoopCount - 1; i >= 0; i-- ){
unsigned int iterations;
if(!s->Loops[i].EndLoop){
continue;
}
iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
unroll_loop(s->C, &s->Loops[i], iterations);
}
}

View file

@ -0,0 +1,32 @@
#ifndef RADEON_EMULATE_LOOPS_H
#define RADEON_EMULATE_LOOPS_H
#define MAX_ITERATIONS 8
struct radeon_compiler;
struct loop_info {
struct rc_instruction * BeginLoop;
struct rc_instruction * Cond;
struct rc_instruction * If;
struct rc_instruction * Brk;
struct rc_instruction * EndIf;
struct rc_instruction * EndLoop;
};
struct emulate_loop_state {
struct radeon_compiler * C;
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
};
void rc_transform_loops(struct radeon_compiler *c, void *user);
void rc_unroll_loops(struct radeon_compiler * c, void *user);
void rc_emulate_loops(struct radeon_compiler * c, void *user);
#endif /* RADEON_EMULATE_LOOPS_H */

View file

@ -0,0 +1,90 @@
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_list.h"
#include <stdlib.h>
#include <stdio.h>
#include "memory_pool.h"
struct rc_list * rc_list(struct memory_pool * pool, void * item)
{
struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
new->Item = item;
new->Next = NULL;
new->Prev = NULL;
return new;
}
void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
{
struct rc_list * temp;
if (*list == NULL) {
*list = new_value;
return;
}
for (temp = *list; temp->Next; temp = temp->Next);
temp->Next = new_value;
new_value->Prev = temp;
}
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
{
if (*list == rm_value) {
*list = rm_value->Next;
return;
}
rm_value->Prev->Next = rm_value->Next;
if (rm_value->Next) {
rm_value->Next->Prev = rm_value->Prev;
}
}
unsigned int rc_list_count(struct rc_list * list)
{
unsigned int count = 0;
while (list) {
count++;
list = list->Next;
}
return count;
}
void rc_list_print(struct rc_list * list)
{
while(list) {
fprintf(stderr, "%p->", list->Item);
list = list->Next;
}
fprintf(stderr, "\n");
}

View file

@ -0,0 +1,46 @@
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_LIST_H
#define RADEON_LIST_H
struct memory_pool;
struct rc_list {
void * Item;
struct rc_list * Prev;
struct rc_list * Next;
};
struct rc_list * rc_list(struct memory_pool * pool, void * item);
void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
unsigned int rc_list_count(struct rc_list * list);
void rc_list_print(struct rc_list * list);
#endif /* RADEON_LIST_H */

View file

@ -0,0 +1,546 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_opcodes.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"
struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_NOP,
.Name = "NOP"
},
{
.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
.Name = "ILLEGAL OPCODE"
},
{
.Opcode = RC_OPCODE_ABS,
.Name = "ABS",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ADD,
.Name = "ADD",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_ARL,
.Name = "ARL",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_CEIL,
.Name = "CEIL",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CLAMP,
.Name = "CLAMP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_CND,
.Name = "CND",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_COS,
.Name = "COS",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_DDX,
.Name = "DDX",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DDY,
.Name = "DDY",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_DP2,
.Name = "DP2",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DP4,
.Name = "DP4",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DPH,
.Name = "DPH",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_DST,
.Name = "DST",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_EX2,
.Name = "EX2",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_EXP,
.Name = "EXP",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_FLR,
.Name = "FLR",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_FRC,
.Name = "FRC",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_KIL,
.Name = "KIL",
.NumSrcRegs = 1
},
{
.Opcode = RC_OPCODE_LG2,
.Name = "LG2",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_LIT,
.Name = "LIT",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_LOG,
.Name = "LOG",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_LRP,
.Name = "LRP",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAD,
.Name = "MAD",
.NumSrcRegs = 3,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MAX,
.Name = "MAX",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MIN,
.Name = "MIN",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MOV,
.Name = "MOV",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_MUL,
.Name = "MUL",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_POW,
.Name = "POW",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RCP,
.Name = "RCP",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_RSQ,
.Name = "RSQ",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SCS,
.Name = "SCS",
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_SEQ,
.Name = "SEQ",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SFL,
.Name = "SFL",
.NumSrcRegs = 0,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGE,
.Name = "SGE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SGT,
.Name = "SGT",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SIN,
.Name = "SIN",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsStandardScalar = 1
},
{
.Opcode = RC_OPCODE_SLE,
.Name = "SLE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SLT,
.Name = "SLT",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SNE,
.Name = "SNE",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SSG,
.Name = "SSG",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_SWZ,
.Name = "SWZ",
.NumSrcRegs = 1,
.HasDstReg = 1,
.IsComponentwise = 1
},
{
.Opcode = RC_OPCODE_XPD,
.Name = "XPD",
.NumSrcRegs = 2,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TEX,
.Name = "TEX",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXB,
.Name = "TXB",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXD,
.Name = "TXD",
.HasTexture = 1,
.NumSrcRegs = 3,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXL,
.Name = "TXL",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_TXP,
.Name = "TXP",
.HasTexture = 1,
.NumSrcRegs = 1,
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_IF,
.Name = "IF",
.IsFlowControl = 1,
.NumSrcRegs = 1
},
{
.Opcode = RC_OPCODE_ELSE,
.Name = "ELSE",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_ENDIF,
.Name = "ENDIF",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_BGNLOOP,
.Name = "BGNLOOP",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_BRK,
.Name = "BRK",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_ENDLOOP,
.Name = "ENDLOOP",
.IsFlowControl = 1,
.NumSrcRegs = 0,
},
{
.Opcode = RC_OPCODE_CONT,
.Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
{
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
},
{
.Opcode = RC_OPCODE_BEGIN_TEX,
.Name = "BEGIN_TEX"
},
{
.Opcode = RC_OPCODE_KILP,
.Name = "KILP",
}
};
void rc_compute_sources_for_writemask(
const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
srcmasks[0] = 0;
srcmasks[1] = 0;
srcmasks[2] = 0;
if (opcode->Opcode == RC_OPCODE_KIL)
srcmasks[0] |= RC_MASK_XYZW;
else if (opcode->Opcode == RC_OPCODE_IF)
srcmasks[0] |= RC_MASK_X;
if (!writemask)
return;
if (opcode->IsComponentwise) {
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
srcmasks[src] |= writemask;
} else if (opcode->IsStandardScalar) {
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
srcmasks[src] |= RC_MASK_X;
} else {
switch(opcode->Opcode) {
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
case RC_OPCODE_DP2:
srcmasks[0] |= RC_MASK_XY;
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
case RC_OPCODE_XPD:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
break;
case RC_OPCODE_DP4:
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
case RC_OPCODE_DPH:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZW;
break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
case RC_OPCODE_TXL:
srcmasks[0] |= RC_MASK_W;
/* Fall through */
case RC_OPCODE_TEX:
switch (inst->U.I.TexSrcTarget) {
case RC_TEXTURE_1D:
srcmasks[0] |= RC_MASK_X;
break;
case RC_TEXTURE_2D:
case RC_TEXTURE_RECT:
case RC_TEXTURE_1D_ARRAY:
srcmasks[0] |= RC_MASK_XY;
break;
case RC_TEXTURE_3D:
case RC_TEXTURE_CUBE:
case RC_TEXTURE_2D_ARRAY:
srcmasks[0] |= RC_MASK_XYZ;
break;
}
break;
case RC_OPCODE_TXD:
switch (inst->U.I.TexSrcTarget) {
case RC_TEXTURE_1D_ARRAY:
srcmasks[0] |= RC_MASK_Y;
/* Fall through. */
case RC_TEXTURE_1D:
srcmasks[0] |= RC_MASK_X;
srcmasks[1] |= RC_MASK_X;
srcmasks[2] |= RC_MASK_X;
break;
case RC_TEXTURE_2D_ARRAY:
srcmasks[0] |= RC_MASK_Z;
/* Fall through. */
case RC_TEXTURE_2D:
case RC_TEXTURE_RECT:
srcmasks[0] |= RC_MASK_XY;
srcmasks[1] |= RC_MASK_XY;
srcmasks[2] |= RC_MASK_XY;
break;
case RC_TEXTURE_3D:
case RC_TEXTURE_CUBE:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
srcmasks[2] |= RC_MASK_XYZ;
break;
}
break;
case RC_OPCODE_DST:
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
break;
case RC_OPCODE_EXP:
case RC_OPCODE_LOG:
srcmasks[0] |= RC_MASK_XY;
break;
case RC_OPCODE_LIT:
srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
break;
default:
break;
}
}
}

View file

@ -0,0 +1,263 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_OPCODES_H
#define RADEON_OPCODES_H
#include <assert.h>
/**
* Opcodes understood by the Radeon compiler.
*/
typedef enum {
RC_OPCODE_NOP = 0,
RC_OPCODE_ILLEGAL_OPCODE,
/** vec4 instruction: dst.c = abs(src0.c); */
RC_OPCODE_ABS,
/** vec4 instruction: dst.c = src0.c + src1.c; */
RC_OPCODE_ADD,
/** special instruction: load address register
* dst.x = floor(src.x), where dst must be an address register */
RC_OPCODE_ARL,
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
/** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
RC_OPCODE_CLAMP,
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
RC_OPCODE_CND,
/** scalar instruction: dst = cos(src0.x) */
RC_OPCODE_COS,
/** special instruction: take vec4 partial derivative in X direction
* dst.c = d src0.c / dx */
RC_OPCODE_DDX,
/** special instruction: take vec4 partial derivative in Y direction
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
RC_OPCODE_DP2,
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
RC_OPCODE_DP4,
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
RC_OPCODE_DPH,
/** special instruction, see ARB_fragment_program */
RC_OPCODE_DST,
/** scalar instruction: dst = 2**src0.x */
RC_OPCODE_EX2,
/** special instruction, see ARB_vertex_program */
RC_OPCODE_EXP,
/** vec4 instruction: dst.c = floor(src0.c) */
RC_OPCODE_FLR,
/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
RC_OPCODE_FRC,
/** special instruction: stop execution if any component of src0 is negative */
RC_OPCODE_KIL,
/** scalar instruction: dst = log_2(src0.x) */
RC_OPCODE_LG2,
/** special instruction, see ARB_vertex_program */
RC_OPCODE_LIT,
/** special instruction, see ARB_vertex_program */
RC_OPCODE_LOG,
/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
RC_OPCODE_LRP,
/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
RC_OPCODE_MAD,
/** vec4 instruction: dst.c = max(src0.c, src1.c) */
RC_OPCODE_MAX,
/** vec4 instruction: dst.c = min(src0.c, src1.c) */
RC_OPCODE_MIN,
/** vec4 instruction: dst.c = src0.c */
RC_OPCODE_MOV,
/** vec4 instruction: dst.c = src0.c*src1.c */
RC_OPCODE_MUL,
/** scalar instruction: dst = src0.x ** src1.x */
RC_OPCODE_POW,
/** scalar instruction: dst = 1 / src0.x */
RC_OPCODE_RCP,
/** scalar instruction: dst = 1 / sqrt(src0.x) */
RC_OPCODE_RSQ,
/** special instruction, see ARB_fragment_program */
RC_OPCODE_SCS,
/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SEQ,
/** vec4 instruction: dst.c = 0.0 */
RC_OPCODE_SFL,
/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SGE,
/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SGT,
/** scalar instruction: dst = sin(src0.x) */
RC_OPCODE_SIN,
/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SLE,
/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SLT,
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SNE,
/** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
RC_OPCODE_SSG,
/** vec4 instruction: dst.c = src0.c - src1.c */
RC_OPCODE_SUB,
/** vec4 instruction: dst.c = src0.c */
RC_OPCODE_SWZ,
/** special instruction, see ARB_fragment_program */
RC_OPCODE_XPD,
RC_OPCODE_TEX,
RC_OPCODE_TXB,
RC_OPCODE_TXD,
RC_OPCODE_TXL,
RC_OPCODE_TXP,
/** branch instruction:
* If src0.x != 0.0, continue with the next instruction;
* otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
*/
RC_OPCODE_IF,
/** branch instruction: jump to matching RC_OPCODE_ENDIF */
RC_OPCODE_ELSE,
/** branch instruction: has no effect */
RC_OPCODE_ENDIF,
RC_OPCODE_BGNLOOP,
RC_OPCODE_BRK,
RC_OPCODE_ENDLOOP,
RC_OPCODE_CONT,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
* across all other channels */
RC_OPCODE_REPL_ALPHA,
/** special instruction, used in R300-R500 fragment programs
* to indicate the start of a block of texture instructions that
* can run simultaneously. */
RC_OPCODE_BEGIN_TEX,
/** Stop execution of the shader (GLSL discard) */
RC_OPCODE_KILP,
MAX_RC_OPCODE
} rc_opcode;
struct rc_opcode_info {
rc_opcode Opcode;
const char * Name;
/** true if the instruction reads from a texture.
*
* \note This is false for the KIL instruction, even though KIL is
* a texture instruction from a hardware point of view. */
unsigned int HasTexture:1;
unsigned int NumSrcRegs:2;
unsigned int HasDstReg:1;
/** true if this instruction affects control flow */
unsigned int IsFlowControl:1;
/** true if this is a vector instruction that operates on components in parallel
* without any cross-component interaction */
unsigned int IsComponentwise:1;
/** true if this instruction sources only its operands X components
* to compute one result which is smeared across all output channels */
unsigned int IsStandardScalar:1;
};
extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
{
assert((unsigned int)opcode < MAX_RC_OPCODE);
assert(rc_opcodes[opcode].Opcode == opcode);
return &rc_opcodes[opcode];
}
struct rc_instruction;
void rc_compute_sources_for_writemask(
const struct rc_instruction *inst,
unsigned int writemask,
unsigned int *srcmasks);
#endif /* RADEON_OPCODES_H */

View file

@ -0,0 +1,700 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
struct src_clobbered_reads_cb_data {
rc_register_file File;
unsigned int Index;
unsigned int Mask;
struct rc_reader_data * ReaderData;
};
typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
struct rc_instruction *,
unsigned int);
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
struct rc_src_register combine;
combine.File = inner.File;
combine.Index = inner.Index;
combine.RelAddr = inner.RelAddr;
if (outer.Abs) {
combine.Abs = 1;
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
return combine;
}
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
struct rc_src_register * src)
{
rc_register_file file = src->File;
struct rc_reader_data * reader_data = data;
if(!rc_inst_can_use_presub(inst,
reader_data->Writer->U.I.PreSub.Opcode,
rc_swizzle_to_writemask(src->Swizzle),
src,
&reader_data->Writer->U.I.PreSub.SrcReg[0],
&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
/* XXX This could probably be handled better. */
if (file == RC_FILE_ADDRESS) {
reader_data->Abort = 1;
return;
}
/* These instructions cannot read from the constants file.
* see radeonTransformTEX()
*/
if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
(inst->U.I.Opcode == RC_OPCODE_TEX ||
inst->U.I.Opcode == RC_OPCODE_TXB ||
inst->U.I.Opcode == RC_OPCODE_TXP ||
inst->U.I.Opcode == RC_OPCODE_TXD ||
inst->U.I.Opcode == RC_OPCODE_TXL ||
inst->U.I.Opcode == RC_OPCODE_KIL)){
reader_data->Abort = 1;
return;
}
}
static void src_clobbered_reads_cb(
void * data,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct src_clobbered_reads_cb_data * sc_data = data;
if (src->File == sc_data->File
&& src->Index == sc_data->Index
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
}
static void is_src_clobbered_scan_write(
void * data,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct src_clobbered_reads_cb_data sc_data;
struct rc_reader_data * reader_data = data;
sc_data.File = file;
sc_data.Index = index;
sc_data.Mask = mask;
sc_data.ReaderData = reader_data;
rc_for_all_reads_src(reader_data->Writer,
src_clobbered_reads_cb, &sc_data);
}
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct rc_reader_data reader_data;
unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.WriteALUResult ||
inst_mov->U.I.SaturateMode)
return;
/* Get a list of all the readers of this MOV instruction. */
reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_mov, &reader_data,
copy_propagate_scan_read, NULL,
is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = inst_mov->U.I.PreSub;
}
/* Finally, remove the original MOV instruction */
rc_remove_instruction(inst_mov);
}
/**
* Check if a source register is actually always the same
* swizzle constant.
*/
static int is_src_uniform_constant(struct rc_src_register src,
rc_swizzle * pswz, unsigned int * pnegate)
{
int have_used = 0;
if (src.File != RC_FILE_NONE) {
*pswz = 0;
return 0;
}
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(src.Swizzle, chan);
if (swz < 4) {
*pswz = 0;
return 0;
}
if (swz == RC_SWIZZLE_UNUSED)
continue;
if (!have_used) {
*pswz = swz;
*pnegate = GET_BIT(src.Negate, chan);
have_used = 1;
} else {
if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
*pswz = 0;
return 0;
}
}
}
return 1;
}
static void constant_folding_mad(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate= 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MUL;
return;
}
}
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_ADD;
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
}
}
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_ADD;
if (negate)
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
return;
}
}
}
static void constant_folding_mul(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate = 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
return;
}
}
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
inst->U.I.Opcode = RC_OPCODE_MOV;
if (negate)
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
return;
} else if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
return;
}
}
}
static void constant_folding_add(struct rc_instruction * inst)
{
rc_swizzle swz = 0;
unsigned int negate = 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
return;
}
}
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
inst->U.I.Opcode = RC_OPCODE_MOV;
return;
}
}
}
/**
* Replace 0.0, 1.0 and 0.5 immediate constants by their
* respective swizzles. Simplify instructions like ADD dst, src, 0;
*/
static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
struct rc_constant * constant;
struct rc_src_register newsrc;
int have_real_reference;
unsigned int chan;
/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
for (chan = 0; chan < 4; ++chan)
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
break;
if (chan == 4) {
inst->U.I.SrcReg[src].File = RC_FILE_NONE;
continue;
}
/* Convert immediates to swizzles. */
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
inst->U.I.SrcReg[src].RelAddr ||
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
continue;
constant =
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE)
continue;
newsrc = inst->U.I.SrcReg[src];
have_real_reference = 0;
for (chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
unsigned int newswz;
float imm;
float baseimm;
if (swz >= 4)
continue;
imm = constant->u.Immediate[swz];
baseimm = imm;
if (imm < 0.0)
baseimm = -baseimm;
if (baseimm == 0.0) {
newswz = RC_SWIZZLE_ZERO;
} else if (baseimm == 1.0) {
newswz = RC_SWIZZLE_ONE;
} else if (baseimm == 0.5 && c->has_half_swizzles) {
newswz = RC_SWIZZLE_HALF;
} else {
have_real_reference = 1;
continue;
}
SET_SWZ(newsrc.Swizzle, chan, newswz);
if (imm < 0.0 && !newsrc.Abs)
newsrc.Negate ^= 1 << chan;
}
if (!have_real_reference) {
newsrc.File = RC_FILE_NONE;
newsrc.Index = 0;
}
/* don't make the swizzle worse */
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
continue;
inst->U.I.SrcReg[src] = newsrc;
}
/* Simplify instructions based on constants */
if (inst->U.I.Opcode == RC_OPCODE_MAD)
constant_folding_mad(inst);
/* note: MAD can simplify to MUL or ADD */
if (inst->U.I.Opcode == RC_OPCODE_MUL)
constant_folding_mul(inst);
else if (inst->U.I.Opcode == RC_OPCODE_ADD)
constant_folding_add(inst);
/* In case this instruction has been converted, make sure all of the
* registers that are no longer used are empty. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
for(i = opcode->NumSrcRegs; i < 3; i++) {
memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
}
}
/**
* If src and dst use the same register, this function returns a writemask that
* indicates wich components are read by src. Otherwise zero is returned.
*/
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
{
if (dst.File != src.File || dst.Index != src.Index) {
return 0;
}
return rc_swizzle_to_writemask(src.Swizzle);
}
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
* in any of its channels. Return 0 otherwise. */
static int src_has_const_swz(struct rc_src_register src) {
int chan;
for(chan = 0; chan < 4; chan++) {
unsigned int swz = GET_SWZ(src.Swizzle, chan);
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
|| swz == RC_SWIZZLE_ONE) {
return 1;
}
}
return 0;
}
static void presub_scan_read(
void * data,
struct rc_instruction * inst,
struct rc_src_register * src)
{
struct rc_reader_data * reader_data = data;
rc_presubtract_op * presub_opcode = reader_data->CbData;
if (!rc_inst_can_use_presub(inst, *presub_opcode,
reader_data->Writer->U.I.DstReg.WriteMask,
src,
&reader_data->Writer->U.I.SrcReg[0],
&reader_data->Writer->U.I.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
}
static int presub_helper(
struct radeon_compiler * c,
struct rc_instruction * inst_add,
rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace)
{
struct rc_reader_data reader_data;
unsigned int i;
rc_presubtract_op cb_op = presub_opcode;
reader_data.CbData = &cb_op;
reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return 0;
for(i = 0; i < reader_data.ReaderCount; i++) {
unsigned int src_index;
struct rc_reader reader = reader_data.Readers[i];
const struct rc_opcode_info * info =
rc_get_opcode_info(reader.Inst->U.I.Opcode);
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
presub_replace(inst_add, reader.Inst, src_index);
}
}
return 1;
}
/* This function assumes that inst_add->U.I.SrcReg[0] and
* inst_add->U.I.SrcReg[1] aren't both negative. */
static void presub_replace_add(
struct rc_instruction * inst_add,
struct rc_instruction * inst_reader,
unsigned int src_index)
{
rc_presubtract_op presub_opcode;
if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
presub_opcode = RC_PRESUB_SUB;
else
presub_opcode = RC_PRESUB_ADD;
if (inst_add->U.I.SrcReg[1].Negate) {
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
} else {
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
}
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
inst_reader->U.I.PreSub.Opcode = presub_opcode;
inst_reader->U.I.SrcReg[src_index] =
chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst_reader->U.I.PreSub.SrcReg[0]);
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
static int is_presub_candidate(
struct radeon_compiler * c,
struct rc_instruction * inst)
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
unsigned int is_constant[2] = {0, 0};
assert(inst->U.I.Opcode == RC_OPCODE_ADD);
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
|| inst->U.I.SaturateMode
|| inst->U.I.WriteALUResult) {
return 0;
}
/* If both sources use a constant swizzle, then we can't convert it to
* a presubtract operation. In fact for the ADD and SUB presubtract
* operations neither source can contain a constant swizzle. This
* specific case is checked in peephole_add_presub_add() when
* we make sure the swizzles for both sources are equal, so we
* don't need to worry about it here. */
for (i = 0; i < 2; i++) {
int chan;
for (chan = 0; chan < 4; chan++) {
rc_swizzle swz =
get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
if (swz == RC_SWIZZLE_ONE
|| swz == RC_SWIZZLE_ZERO
|| swz == RC_SWIZZLE_HALF) {
is_constant[i] = 1;
}
}
}
if (is_constant[0] && is_constant[1])
return 0;
for(i = 0; i < info->NumSrcRegs; i++) {
struct rc_src_register src = inst->U.I.SrcReg[i];
if (src_reads_dst_mask(src, inst->U.I.DstReg))
return 0;
src.File = RC_FILE_PRESUB;
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
return 0;
}
return 1;
}
static int peephole_add_presub_add(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
return 0;
/* src0 and src1 can't have absolute values */
if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
return 0;
/* presub_replace_add() assumes only one is negative */
if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
return 0;
/* if src0 is negative, at least all bits of dstmask have to be set */
if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
return 0;
/* if src1 is negative, at least all bits of dstmask have to be set */
if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
return 0;
if (!is_presub_candidate(c, inst_add))
return 0;
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
static void presub_replace_inv(
struct rc_instruction * inst_add,
struct rc_instruction * inst_reader,
unsigned int src_index)
{
/* We must be careful not to modify inst_add, since it
* is possible it will remain part of the program.*/
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst_reader->U.I.PreSub.SrcReg[0]);
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
}
/**
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
* @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
static int peephole_add_presub_inv(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned int i, swz;
if (!is_presub_candidate(c, inst_add))
return 0;
/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
&& swz != RC_SWIZZLE_ONE) {
return 0;
}
}
/* Check src1. */
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
inst_add->U.I.DstReg.WriteMask
|| inst_add->U.I.SrcReg[1].Abs
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
return 0;
}
if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
/**
* @return
* 0 if inst is still part of the program.
* 1 if inst is no longer part of the program.
*/
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
{
switch(inst->U.I.Opcode){
case RC_OPCODE_ADD:
if (c->has_presub) {
if(peephole_add_presub_inv(c, inst))
return 1;
if(peephole_add_presub_add(c, inst))
return 1;
}
break;
default:
break;
}
return 0;
}
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
struct rc_instruction * cur = inst;
inst = inst->Next;
constant_folding(c, cur);
if(peephole(c, cur))
continue;
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
copy_propagate(c, cur);
/* cur may no longer be part of the program */
}
}
}

View file

@ -0,0 +1,62 @@
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_opcodes.h"
#include "radeon_program_pair.h"
static void mark_used_presub(struct rc_pair_sub_instruction * sub)
{
if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
unsigned int presub_reg_count = rc_presubtract_src_reg_count(
sub->Src[RC_PAIR_PRESUB_SRC].Index);
unsigned int i;
for (i = 0; i < presub_reg_count; i++) {
sub->Src[i].Used = 1;
}
}
}
static void mark_used(
struct rc_instruction * inst,
struct rc_pair_sub_instruction * sub)
{
unsigned int i;
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for (i = 0; i < info->NumSrcRegs; i++) {
unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
if (src_type & RC_SOURCE_RGB) {
inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
}
if (src_type & RC_SOURCE_ALPHA) {
inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
}
}
}
/**
* This pass finds sources that are not used by their instruction and marks
* them as unused.
*/
void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
unsigned int i;
if (inst->Type == RC_INSTRUCTION_NORMAL)
continue;
/* Mark all sources as unused */
for (i = 0; i < 4; i++) {
inst->U.P.RGB.Src[i].Used = 0;
inst->U.P.Alpha.Src[i].Used = 0;
}
mark_used(inst, &inst->U.P.RGB);
mark_used(inst, &inst->U.P.Alpha);
mark_used_presub(&inst->U.P.RGB);
mark_used_presub(&inst->U.P.Alpha);
}
}

View file

@ -0,0 +1,706 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program_pair.h"
#include <stdio.h>
#include "main/glheader.h"
#include "program/register_allocate.h"
#include "ralloc.h"
#include "r300_fragprog_swizzle.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_list.h"
#include "radeon_variable.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct register_info {
struct live_intervals Live[4];
unsigned int Used:1;
unsigned int Allocated:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int Writemask;
};
struct regalloc_state {
struct radeon_compiler * C;
struct register_info * Input;
unsigned int NumInputs;
struct register_info * Temporary;
unsigned int NumTemporaries;
unsigned int Simple;
int LoopEnd;
};
enum rc_reg_class {
RC_REG_CLASS_SINGLE,
RC_REG_CLASS_DOUBLE,
RC_REG_CLASS_TRIPLE,
RC_REG_CLASS_ALPHA,
RC_REG_CLASS_SINGLE_PLUS_ALPHA,
RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
RC_REG_CLASS_X,
RC_REG_CLASS_Y,
RC_REG_CLASS_Z,
RC_REG_CLASS_XY,
RC_REG_CLASS_YZ,
RC_REG_CLASS_XZ,
RC_REG_CLASS_XW,
RC_REG_CLASS_YW,
RC_REG_CLASS_ZW,
RC_REG_CLASS_XYW,
RC_REG_CLASS_YZW,
RC_REG_CLASS_XZW,
RC_REG_CLASS_COUNT
};
struct rc_class {
enum rc_reg_class Class;
unsigned int WritemaskCount;
/** This is 1 if this class is being used by the register allocator
* and 0 otherwise */
unsigned int Used;
/** This is the ID number assigned to this class by ra. */
unsigned int Id;
/** List of writemasks that belong to this class */
unsigned int Writemasks[3];
};
static void print_live_intervals(struct live_intervals * src)
{
if (!src || !src->Used) {
DBG("(null)");
return;
}
DBG("(%i,%i)", src->Start, src->End);
}
static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
{
if (VERBOSE) {
DBG("overlap_live_intervals: ");
print_live_intervals(a);
DBG(" to ");
print_live_intervals(b);
DBG("\n");
}
if (!a->Used || !b->Used) {
DBG(" unused interval\n");
return 0;
}
if (a->Start > b->Start) {
if (a->Start < b->End) {
DBG(" overlap\n");
return 1;
}
} else if (b->Start > a->Start) {
if (b->Start < a->End) {
DBG(" overlap\n");
return 1;
}
} else { /* a->Start == b->Start */
if (a->Start != a->End && b->Start != b->End) {
DBG(" overlap\n");
return 1;
}
}
DBG(" no overlap\n");
return 0;
}
static void scan_read_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
unsigned int i;
if (file != RC_FILE_INPUT)
return;
s->Input[index].Used = 1;
reg = &s->Input[index];
for (i = 0; i < 4; i++) {
if (!((mask >> i) & 0x1)) {
continue;
}
reg->Live[i].Used = 1;
reg->Live[i].Start = 0;
reg->Live[i].End =
s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
}
}
static void remap_register(void * data, struct rc_instruction * inst,
rc_register_file * file, unsigned int * index)
{
struct regalloc_state * s = data;
const struct register_info * reg;
if (*file == RC_FILE_TEMPORARY && s->Simple)
reg = &s->Temporary[*index];
else if (*file == RC_FILE_INPUT)
reg = &s->Input[*index];
else
return;
if (reg->Allocated) {
*index = reg->Index;
}
}
static void alloc_input_simple(void * data, unsigned int input,
unsigned int hwreg)
{
struct regalloc_state * s = data;
if (input >= s->NumInputs)
return;
s->Input[input].Allocated = 1;
s->Input[input].File = RC_FILE_TEMPORARY;
s->Input[input].Index = hwreg;
}
/* This functions offsets the temporary register indices by the number
* of input registers, because input registers are actually temporaries and
* should not occupy the same space.
*
* This pass is supposed to be used to maintain correct allocation of inputs
* if the standard register allocation is disabled. */
static void do_regalloc_inputs_only(struct regalloc_state * s)
{
for (unsigned i = 0; i < s->NumTemporaries; i++) {
s->Temporary[i].Allocated = 1;
s->Temporary[i].File = RC_FILE_TEMPORARY;
s->Temporary[i].Index = i + s->NumInputs;
}
}
static unsigned int is_derivative(rc_opcode op)
{
return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
}
static int find_class(
struct rc_class * classes,
unsigned int writemask,
unsigned int max_writemask_count)
{
unsigned int i;
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
unsigned int j;
if (classes[i].WritemaskCount > max_writemask_count) {
continue;
}
for (j = 0; j < 3; j++) {
if (classes[i].Writemasks[j] == writemask) {
return i;
}
}
}
return -1;
}
static enum rc_reg_class variable_get_class(
struct rc_variable * variable,
struct rc_class * classes)
{
unsigned int i;
unsigned int can_change_writemask= 1;
unsigned int writemask = rc_variable_writemask_sum(variable);
struct rc_list * readers = rc_variable_readers_union(variable);
int class_index;
if (!variable->C->is_r500) {
struct rc_class c;
/* The assumption here is that if an instruction has type
* RC_INSTRUCTION_NORMAL then it is a TEX instruction.
* r300 and r400 can't swizzle the result of a TEX lookup. */
if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
writemask = RC_MASK_XYZW;
}
/* Check if it is possible to do swizzle packing for r300/r400
* without creating non-native swizzles. */
class_index = find_class(classes, writemask, 3);
if (class_index < 0) {
goto error;
}
c = classes[class_index];
for (i = 0; i < c.WritemaskCount; i++) {
int j;
unsigned int conversion_swizzle =
rc_make_conversion_swizzle(
writemask, c.Writemasks[i]);
for (j = 0; j < variable->ReaderCount; j++) {
unsigned int old_swizzle;
unsigned int new_swizzle;
struct rc_reader r = variable->Readers[j];
if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
old_swizzle = r.U.P.Arg->Swizzle;
} else {
old_swizzle = r.U.I.Src->Swizzle;
}
new_swizzle = rc_adjust_channels(
old_swizzle, conversion_swizzle);
if (!r300_swizzle_is_native_basic(new_swizzle)) {
can_change_writemask = 0;
break;
}
}
if (!can_change_writemask) {
break;
}
}
}
if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
/* DDX/DDY seem to always fail when their writemasks are
* changed.*/
if (is_derivative(variable->Inst->U.P.RGB.Opcode)
|| is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
can_change_writemask = 0;
}
}
for ( ; readers; readers = readers->Next) {
struct rc_reader * r = readers->Item;
if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
can_change_writemask = 0;
break;
}
/* DDX/DDY also fail when their swizzles are changed. */
if (is_derivative(r->Inst->U.P.RGB.Opcode)
|| is_derivative(r->Inst->U.P.Alpha.Opcode)) {
can_change_writemask = 0;
break;
}
}
}
class_index = find_class(classes, writemask,
can_change_writemask ? 3 : 1);
if (class_index > -1) {
return classes[class_index].Class;
} else {
error:
rc_error(variable->C,
"Could not find class for index=%u mask=%u\n",
variable->Dst.Index, writemask);
return 0;
}
}
static unsigned int overlap_live_intervals_array(
struct live_intervals * a,
struct live_intervals * b)
{
unsigned int a_chan, b_chan;
for (a_chan = 0; a_chan < 4; a_chan++) {
for (b_chan = 0; b_chan < 4; b_chan++) {
if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
return 1;
}
}
}
return 0;
}
static unsigned int reg_get_index(int reg)
{
return reg / RC_MASK_XYZW;
}
static unsigned int reg_get_writemask(int reg)
{
return (reg % RC_MASK_XYZW) + 1;
}
static int get_reg_id(unsigned int index, unsigned int writemask)
{
assert(writemask);
if (writemask == 0) {
return 0;
}
return (index * RC_MASK_XYZW) + (writemask - 1);
}
#if VERBOSE
static void print_reg(int reg)
{
unsigned int index = reg_get_index(reg);
unsigned int mask = reg_get_writemask(reg);
fprintf(stderr, "Temp[%u].%c%c%c%c", index,
mask & RC_MASK_X ? 'x' : '_',
mask & RC_MASK_Y ? 'y' : '_',
mask & RC_MASK_Z ? 'z' : '_',
mask & RC_MASK_W ? 'w' : '_');
}
#endif
static void add_register_conflicts(
struct ra_regs * regs,
unsigned int max_temp_regs)
{
unsigned int index, a_mask, b_mask;
for (index = 0; index < max_temp_regs; index++) {
for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
b_mask++) {
if (a_mask & b_mask) {
ra_add_reg_conflict(regs,
get_reg_id(index, a_mask),
get_reg_id(index, b_mask));
}
}
}
}
}
static void do_advanced_regalloc(struct regalloc_state * s)
{
struct rc_class rc_class_list [] = {
{RC_REG_CLASS_SINGLE, 3, 0, 0,
{RC_MASK_X,
RC_MASK_Y,
RC_MASK_Z}},
{RC_REG_CLASS_DOUBLE, 3, 0, 0,
{RC_MASK_X | RC_MASK_Y,
RC_MASK_X | RC_MASK_Z,
RC_MASK_Y | RC_MASK_Z}},
{RC_REG_CLASS_TRIPLE, 1, 0, 0,
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_ALPHA, 1, 0, 0,
{RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
{RC_MASK_X | RC_MASK_W,
RC_MASK_Y | RC_MASK_W,
RC_MASK_Z | RC_MASK_W}},
{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_X, 1, 0, 0,
{RC_MASK_X,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_Y, 1, 0, 0,
{RC_MASK_Y,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_Z, 1, 0, 0,
{RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XY, 1, 0, 0,
{RC_MASK_X | RC_MASK_Y,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YZ, 1, 0, 0,
{RC_MASK_Y | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XZ, 1, 0, 0,
{RC_MASK_X | RC_MASK_Z,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XW, 1, 0, 0,
{RC_MASK_X | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YW, 1, 0, 0,
{RC_MASK_Y | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_ZW, 1, 0, 0,
{RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XYW, 1, 0, 0,
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_YZW, 1, 0, 0,
{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}},
{RC_REG_CLASS_XZW, 1, 0, 0,
{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
RC_MASK_NONE,
RC_MASK_NONE}}
};
unsigned int i, j, index, input_node, node_count, node_index;
unsigned int * node_classes;
unsigned int * input_classes;
struct rc_instruction * inst;
struct rc_list * var_ptr;
struct rc_list * variables;
struct ra_regs * regs;
struct ra_graph * graph;
/* Allocate the main ra data structure */
regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
/* Get list of program variables */
variables = rc_get_variables(s->C);
node_count = rc_list_count(variables);
node_classes = memory_pool_malloc(&s->C->Pool,
node_count * sizeof(unsigned int));
input_classes = memory_pool_malloc(&s->C->Pool,
s->NumInputs * sizeof(unsigned int));
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next, node_index++) {
unsigned int class_index;
/* Compute the live intervals */
rc_variable_compute_live_intervals(var_ptr->Item);
class_index = variable_get_class(var_ptr->Item, rc_class_list);
/* If we haven't used this register class yet, mark it
* as used and allocate space for it. */
if (!rc_class_list[class_index].Used) {
rc_class_list[class_index].Used = 1;
rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
}
node_classes[node_index] = rc_class_list[class_index].Id;
}
/* Assign registers to the classes */
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
struct rc_class class = rc_class_list[i];
if (!class.Used) {
continue;
}
for (index = 0; index < s->C->max_temp_regs; index++) {
for (j = 0; j < class.WritemaskCount; j++) {
int reg_id = get_reg_id(index,
class.Writemasks[j]);
ra_class_add_reg(regs, class.Id, reg_id);
}
}
}
/* Add register conflicts */
add_register_conflicts(regs, s->C->max_temp_regs);
/* Calculate live intervals for input registers */
for (inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_BGNLOOP) {
struct rc_instruction * endloop =
rc_match_bgnloop(inst);
if (endloop->IP > s->LoopEnd) {
s->LoopEnd = endloop->IP;
}
}
rc_for_all_reads_mask(inst, scan_read_callback, s);
}
/* Create classes for input registers */
for (i = 0; i < s->NumInputs; i++) {
unsigned int chan, class_id, writemask = 0;
for (chan = 0; chan < 4; chan++) {
if (s->Input[i].Live[chan].Used) {
writemask |= (1 << chan);
}
}
s->Input[i].Writemask = writemask;
if (!writemask) {
continue;
}
class_id = ra_alloc_reg_class(regs);
input_classes[i] = class_id;
ra_class_add_reg(regs, class_id,
get_reg_id(s->Input[i].Index, writemask));
}
ra_set_finalize(regs);
graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
/* Build the interference graph */
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next,node_index++) {
struct rc_list * a, * b;
unsigned int b_index;
ra_set_node_class(graph, node_index, node_classes[node_index]);
for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
b; b = b->Next, b_index++) {
struct rc_variable * var_a = a->Item;
while (var_a) {
struct rc_variable * var_b = b->Item;
while (var_b) {
if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
ra_add_node_interference(graph,
node_index, b_index);
}
var_b = var_b->Friend;
}
var_a = var_a->Friend;
}
}
}
/* Add input registers to the interference graph */
for (i = 0, input_node = 0; i< s->NumInputs; i++) {
if (!s->Input[i].Writemask) {
continue;
}
ra_set_node_class(graph, node_count + input_node,
input_classes[i]);
for (var_ptr = variables, node_index = 0;
var_ptr; var_ptr = var_ptr->Next, node_index++) {
struct rc_variable * var = var_ptr->Item;
if (overlap_live_intervals_array(s->Input[i].Live,
var->Live)) {
ra_add_node_interference(graph, node_index,
node_count + input_node);
}
}
/* Manually allocate a register for this input */
ra_set_node_reg(graph, node_count + input_node, get_reg_id(
s->Input[i].Index, s->Input[i].Writemask));
input_node++;
}
if (!ra_allocate_no_spills(graph)) {
rc_error(s->C, "Ran out of hardware temporaries\n");
return;
}
/* Rewrite the registers */
for (var_ptr = variables, node_index = 0; var_ptr;
var_ptr = var_ptr->Next, node_index++) {
int reg = ra_get_node_reg(graph, node_index);
unsigned int writemask = reg_get_writemask(reg);
unsigned int index = reg_get_index(reg);
struct rc_variable * var = var_ptr->Item;
if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
writemask = rc_variable_writemask_sum(var);
}
if (var->Dst.File == RC_FILE_INPUT) {
continue;
}
rc_variable_change_dst(var, index, writemask);
}
ralloc_free(graph);
ralloc_free(regs);
}
/**
* @param user This parameter should be a pointer to an integer value. If this
* integer value is zero, then a simple register allocator will be used that
* only allocates space for input registers (\sa do_regalloc_inputs_only). If
* user is non-zero, then the regular register allocator will be used
* (\sa do_regalloc).
*/
void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c =
(struct r300_fragment_program_compiler*)cc;
struct regalloc_state s;
int * do_full_regalloc = (int*)user;
memset(&s, 0, sizeof(s));
s.C = cc;
s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
s.Input = memory_pool_malloc(&cc->Pool,
s.NumInputs * sizeof(struct register_info));
memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
s.Temporary = memory_pool_malloc(&cc->Pool,
s.NumTemporaries * sizeof(struct register_info));
memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
rc_recompute_ips(s.C);
c->AllocateHwInputs(c, &alloc_input_simple, &s);
if (*do_full_regalloc) {
do_advanced_regalloc(&s);
} else {
s.Simple = 1;
do_regalloc_inputs_only(&s);
}
/* Rewrite inputs and if we are doing the simple allocation, rewrite
* temporaries too. */
for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
inst != &s.C->Program.Instructions;
inst = inst->Next) {
rc_remap_registers(inst, &remap_register, &s);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,359 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program_pair.h"
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
/**
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction
* and reverse the order of arguments for CMP.
*/
static void final_rewrite(struct rc_sub_instruction *inst)
{
struct rc_src_register tmp;
switch(inst->Opcode) {
case RC_OPCODE_ADD:
inst->SrcReg[2] = inst->SrcReg[1];
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[1].Negate = RC_MASK_NONE;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_CMP:
tmp = inst->SrcReg[2];
inst->SrcReg[2] = inst->SrcReg[0];
inst->SrcReg[0] = tmp;
break;
case RC_OPCODE_MOV:
/* AMD say we should use CMP.
* However, when we transform
* KIL -r0;
* into
* CMP tmp, -r0, -r0, 0;
* KIL tmp;
* we get incorrect behaviour on R500 when r0 == 0.0.
* It appears that the R500 KIL hardware treats -0.0 as less
* than zero.
*/
inst->SrcReg[1].File = RC_FILE_NONE;
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
case RC_OPCODE_MUL:
inst->SrcReg[2].File = RC_FILE_NONE;
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
inst->Opcode = RC_OPCODE_MAD;
break;
default:
/* nothing to do */
break;
}
}
/**
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct rc_sub_instruction * inst,
int * needrgb, int * needalpha, int * istranscendent)
{
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
*istranscendent = 0;
if (inst->WriteALUResult == RC_ALURESULT_X)
*needrgb = 1;
else if (inst->WriteALUResult == RC_ALURESULT_W)
*needalpha = 1;
switch(inst->Opcode) {
case RC_OPCODE_ADD:
case RC_OPCODE_CMP:
case RC_OPCODE_CND:
case RC_OPCODE_DDX:
case RC_OPCODE_DDY:
case RC_OPCODE_FRC:
case RC_OPCODE_MAD:
case RC_OPCODE_MAX:
case RC_OPCODE_MIN:
case RC_OPCODE_MOV:
case RC_OPCODE_MUL:
break;
case RC_OPCODE_COS:
case RC_OPCODE_EX2:
case RC_OPCODE_LG2:
case RC_OPCODE_RCP:
case RC_OPCODE_RSQ:
case RC_OPCODE_SIN:
*istranscendent = 1;
*needalpha = 1;
break;
case RC_OPCODE_DP4:
*needalpha = 1;
/* fall through */
case RC_OPCODE_DP3:
*needrgb = 1;
break;
default:
break;
}
}
static void src_uses(struct rc_src_register src, unsigned int * rgb,
unsigned int * alpha)
{
int j;
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(src.Swizzle, j);
if (swz < 3)
*rgb = 1;
else if (swz < 4)
*alpha = 1;
}
}
/**
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
struct rc_pair_instruction * pair,
struct rc_sub_instruction * inst)
{
int needrgb, needalpha, istranscendent;
const struct rc_opcode_info * opcode;
int i;
memset(pair, 0, sizeof(struct rc_pair_instruction));
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
if (needrgb) {
if (istranscendent)
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
else
pair->RGB.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->RGB.Saturate = 1;
}
if (needalpha) {
pair->Alpha.Opcode = inst->Opcode;
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
pair->Alpha.Saturate = 1;
}
opcode = rc_get_opcode_info(inst->Opcode);
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
* operation end up in src0 or src1. */
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
/* rc_pair_alloc_source() will fill in data for
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
int j;
for(j = 0; j < 3; j++) {
int src_regs;
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
continue;
src_regs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for(i = 0; i < src_regs; i++) {
unsigned int rgb = 0;
unsigned int alpha = 0;
src_uses(inst->SrcReg[j], &rgb, &alpha);
if(rgb) {
pair->RGB.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->RGB.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->RGB.Src[i].Used = 1;
}
if(alpha) {
pair->Alpha.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->Alpha.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->Alpha.Src[i].Used = 1;
}
}
}
}
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int srcmask = 0;
int j;
/* We don't care about the alpha channel here. We only
* want the part of the swizzle that writes to rgb,
* since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
if (swz < RC_SWIZZLE_W)
srcrgb = 1;
else if (swz == RC_SWIZZLE_W)
srcalpha = 1;
if (swz < RC_SWIZZLE_UNUSED)
srcmask |= 1 << j;
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"rgb instruction.\n");
return;
}
pair->RGB.Arg[i].Source = source;
pair->RGB.Arg[i].Swizzle =
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
}
if (needalpha) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
if (swz < 3)
srcrgb = 1;
else if (swz < 4)
srcalpha = 1;
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
if (source < 0) {
rc_error(&c->Base, "Failed to translate "
"alpha instruction.\n");
return;
}
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
}
}
/* Destination handling */
if (inst->DstReg.File == RC_FILE_OUTPUT) {
if (inst->DstReg.Index == c->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
} else {
for (i = 0; i < 4; i++) {
if (inst->DstReg.Index == c->OutputColor[i]) {
pair->RGB.Target = i;
pair->Alpha.Target = i;
pair->RGB.OutputWriteMask |=
inst->DstReg.WriteMask & RC_MASK_XYZ;
pair->Alpha.OutputWriteMask |=
GET_BIT(inst->DstReg.WriteMask, 3);
break;
}
}
}
} else {
if (needrgb) {
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
if (needalpha) {
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
if (pair->Alpha.WriteMask) {
pair->Alpha.DestIndex = inst->DstReg.Index;
}
}
}
if (inst->WriteALUResult) {
pair->WriteALUResult = inst->WriteALUResult;
pair->ALUResultCompare = inst->ALUResultCompare;
}
}
static void check_opcode_support(struct r300_fragment_program_compiler *c,
struct rc_sub_instruction *inst)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
if (opcode->HasDstReg) {
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
return;
}
}
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->SrcReg[i].RelAddr) {
rc_error(&c->Base, "Fragment program does not support relative addressing "
" of source operands.\n");
return;
}
}
}
/**
* Translate all ALU instructions into corresponding pair instructions,
* performing no other changes.
*/
void rc_pair_translate(struct radeon_compiler *cc, void *user)
{
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
inst != &c->Base.Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode;
struct rc_sub_instruction copy;
if (inst->Type != RC_INSTRUCTION_NORMAL)
continue;
opcode = rc_get_opcode_info(inst->U.I.Opcode);
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
continue;
copy = inst->U.I;
check_opcode_support(c, &copy);
final_rewrite(&copy);
inst->Type = RC_INSTRUCTION_PAIR;
set_pair_instruction(c, &inst->U.P, &copy);
}
}

View file

@ -0,0 +1,225 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program.h"
#include <stdio.h>
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
/**
* Transform the given clause in the following way:
* 1. Replace it with an empty clause
* 2. For every instruction in the original clause, try the given
* transformations in order.
* 3. If one of the transformations returns GL_TRUE, assume that it
* has emitted the appropriate instruction(s) into the new clause;
* otherwise, copy the instruction verbatim.
*
* \note The transformation is currently not recursive; in other words,
* instructions emitted by transformations are not transformed.
*
* \note The transform is called 'local' because it can only look at
* one instruction at a time.
*/
void rc_local_transform(
struct radeon_compiler * c,
void *user)
{
struct radeon_program_transformation *transformations =
(struct radeon_program_transformation*)user;
struct rc_instruction * inst = c->Program.Instructions.Next;
while(inst != &c->Program.Instructions) {
struct rc_instruction * current = inst;
int i;
inst = inst->Next;
for(i = 0; transformations[i].function; ++i) {
struct radeon_program_transformation* t = transformations + i;
if (t->function(c, current, t->userData))
break;
}
}
}
struct get_used_temporaries_data {
unsigned char * Used;
unsigned int UsedLength;
};
static void get_used_temporaries_cb(
void * userdata,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{
struct get_used_temporaries_data * d = userdata;
if (file != RC_FILE_TEMPORARY)
return;
if (index >= d->UsedLength)
return;
d->Used[index] |= mask;
}
/**
* This function fills in the parameter 'used' with a writemask that
* represent which components of each temporary register are used by the
* program. This is meant to be combined with rc_find_free_temporary_list as a
* more efficient version of rc_find_free_temporary.
* @param used The function does not initialize this parameter.
*/
void rc_get_used_temporaries(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length)
{
struct rc_instruction * inst;
struct get_used_temporaries_data d;
d.Used = used;
d.UsedLength = used_length;
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
}
}
/* Search a list of used temporaries for a free one
* \sa rc_get_used_temporaries
* @note If this functions finds a free temporary, it will mark it as used
* in the used temporary list (param 'used')
* @param used list of used temporaries
* @param used_length number of items in param 'used'
* @param mask which components must be free in the temporary index that is
* returned.
* @return -1 If there are no more free temporaries, otherwise the index of
* a temporary register where the components specified in param 'mask' are
* not being used.
*/
int rc_find_free_temporary_list(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length,
unsigned int mask)
{
int i;
for(i = 0; i < used_length; i++) {
if ((~used[i] & mask) == mask) {
used[i] |= mask;
return i;
}
}
return -1;
}
unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
unsigned char used[RC_REGISTER_MAX_INDEX];
int free;
memset(used, 0, sizeof(used));
rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
RC_MASK_XYZW);
if (free < 0) {
rc_error(c, "Ran out of temporary registers\n");
return 0;
}
return free;
}
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
{
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
memset(inst, 0, sizeof(struct rc_instruction));
inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
return inst;
}
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
{
inst->Prev = after;
inst->Next = after->Next;
inst->Prev->Next = inst;
inst->Next->Prev = inst;
}
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
{
struct rc_instruction * inst = rc_alloc_instruction(c);
rc_insert_instruction(after, inst);
return inst;
}
void rc_remove_instruction(struct rc_instruction * inst)
{
inst->Prev->Next = inst->Next;
inst->Next->Prev = inst->Prev;
}
/**
* Return the number of instructions in the program.
*/
unsigned int rc_recompute_ips(struct radeon_compiler * c)
{
unsigned int ip = 0;
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
inst->IP = ip++;
}
c->Program.Instructions.IP = 0xcafedead;
return ip;
}

View file

@ -0,0 +1,206 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_H_
#define __RADEON_PROGRAM_H_
#include <stdint.h>
#include <string.h>
#include "radeon_opcodes.h"
#include "radeon_code.h"
#include "radeon_program_constants.h"
#include "radeon_program_pair.h"
struct radeon_compiler;
struct rc_src_register {
unsigned int File:4;
/** Negative values may be used for relative addressing. */
signed int Index:(RC_REGISTER_INDEX_BITS+1);
unsigned int RelAddr:1;
unsigned int Swizzle:12;
/** Take the component-wise absolute value */
unsigned int Abs:1;
/** Post-Abs negation. */
unsigned int Negate:4;
};
struct rc_dst_register {
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
};
struct rc_presub_instruction {
rc_presubtract_op Opcode;
struct rc_src_register SrcReg[2];
};
/**
* Instructions are maintained by the compiler in a doubly linked list
* of these structures.
*
* This instruction format is intended to be expanded for hardware-specific
* trickery. At different stages of compilation, a different set of
* instruction types may be valid.
*/
struct rc_sub_instruction {
struct rc_src_register SrcReg[3];
struct rc_dst_register DstReg;
/**
* Opcode of this instruction, according to \ref rc_opcode enums.
*/
unsigned int Opcode:8;
/**
* Saturate each value of the result to the range [0,1] or [-1,1],
* according to \ref rc_saturate_mode enums.
*/
unsigned int SaturateMode:2;
/**
* Writing to the special register RC_SPECIAL_ALU_RESULT
*/
/*@{*/
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
/*@}*/
/**
* \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
*/
/*@{*/
/** Source texture unit. */
unsigned int TexSrcUnit:5;
/** Source texture target, one of the \ref rc_texture_target enums */
unsigned int TexSrcTarget:3;
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
/**R500 Only. How to swizzle the result of a TEX lookup*/
unsigned int TexSwizzle:12;
/*@}*/
/** This holds information about the presubtract operation used by
* this instruction. */
struct rc_presub_instruction PreSub;
};
typedef enum {
RC_INSTRUCTION_NORMAL = 0,
RC_INSTRUCTION_PAIR
} rc_instruction_type;
struct rc_instruction {
struct rc_instruction * Prev;
struct rc_instruction * Next;
rc_instruction_type Type;
union {
struct rc_sub_instruction I;
struct rc_pair_instruction P;
} U;
/**
* Warning: IPs are not stable. If you want to use them,
* you need to recompute them at the beginning of each pass
* using \ref rc_recompute_ips
*/
unsigned int IP;
};
struct rc_program {
/**
* Instructions.Next points to the first instruction,
* Instructions.Prev points to the last instruction.
*/
struct rc_instruction Instructions;
/* Long term, we should probably remove InputsRead & OutputsWritten,
* since updating dependent state can be fragile, and they aren't
* actually used very often. */
uint32_t InputsRead;
uint32_t OutputsWritten;
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
struct rc_constant_list Constants;
};
/**
* A transformation that can be passed to \ref rc_local_transform.
*
* The function will be called once for each instruction.
* It has to either emit the appropriate transformed code for the instruction
* and return true, or return false if it doesn't understand the
* instruction.
*
* The function gets passed the userData as last parameter.
*/
struct radeon_program_transformation {
int (*function)(
struct radeon_compiler*,
struct rc_instruction*,
void*);
void *userData;
};
void rc_local_transform(
struct radeon_compiler *c,
void *user);
void rc_get_used_temporaries(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length);
int rc_find_free_temporary_list(
struct radeon_compiler * c,
unsigned char * used,
unsigned int used_length,
unsigned int mask);
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
void rc_remove_instruction(struct rc_instruction * inst);
unsigned int rc_recompute_ips(struct radeon_compiler * c);
void rc_print_program(const struct rc_program *prog);
rc_swizzle rc_mask_to_swizzle(unsigned int mask);
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_ALU_H_
#define __RADEON_PROGRAM_ALU_H_
#include "radeon_program.h"
int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
int r300_transform_trig_simple(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
int radeonTransformTrigScale(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
int r300_transform_trig_scale_vertex(
struct radeon_compiler *c,
struct rc_instruction *inst,
void*);
int radeonTransformDeriv(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
void rc_transform_KILP(struct radeon_compiler * c,
void *user);
#endif /* __RADEON_PROGRAM_ALU_H_ */

View file

@ -0,0 +1,190 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_PROGRAM_CONSTANTS_H
#define RADEON_PROGRAM_CONSTANTS_H
typedef enum {
RC_SATURATE_NONE = 0,
RC_SATURATE_ZERO_ONE,
RC_SATURATE_MINUS_PLUS_ONE
} rc_saturate_mode;
typedef enum {
RC_TEXTURE_2D_ARRAY,
RC_TEXTURE_1D_ARRAY,
RC_TEXTURE_CUBE,
RC_TEXTURE_3D,
RC_TEXTURE_RECT,
RC_TEXTURE_2D,
RC_TEXTURE_1D
} rc_texture_target;
typedef enum {
/**
* Used to indicate unused register descriptions and
* source register that use a constant swizzle.
*/
RC_FILE_NONE = 0,
RC_FILE_TEMPORARY,
/**
* Input register.
*
* \note The compiler attaches no implicit semantics to input registers.
* Fragment/vertex program specific semantics must be defined explicitly
* using the appropriate compiler interfaces.
*/
RC_FILE_INPUT,
/**
* Output register.
*
* \note The compiler attaches no implicit semantics to input registers.
* Fragment/vertex program specific semantics must be defined explicitly
* using the appropriate compiler interfaces.
*/
RC_FILE_OUTPUT,
RC_FILE_ADDRESS,
/**
* Indicates a constant from the \ref rc_constant_list .
*/
RC_FILE_CONSTANT,
/**
* Indicates a special register, see RC_SPECIAL_xxx.
*/
RC_FILE_SPECIAL,
/**
* Indicates this register should use the result of the presubtract
* operation.
*/
RC_FILE_PRESUB
} rc_register_file;
enum {
/** R500 fragment program ALU result "register" */
RC_SPECIAL_ALU_RESULT = 0,
/** Must be last */
RC_NUM_SPECIAL_REGISTERS
};
#define RC_REGISTER_INDEX_BITS 10
#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
typedef enum {
RC_SWIZZLE_X = 0,
RC_SWIZZLE_Y,
RC_SWIZZLE_Z,
RC_SWIZZLE_W,
RC_SWIZZLE_ZERO,
RC_SWIZZLE_ONE,
RC_SWIZZLE_HALF,
RC_SWIZZLE_UNUSED
} rc_swizzle;
#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
#define SET_SWZ(swz, idx, newv) \
do { \
(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
} while(0)
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
/**
* \name Bitmasks for components of vectors.
*
* Used for write masks, negation masks, etc.
*/
/*@{*/
#define RC_MASK_NONE 0
#define RC_MASK_X 1
#define RC_MASK_Y 2
#define RC_MASK_Z 4
#define RC_MASK_W 8
#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
/*@}*/
typedef enum {
RC_ALURESULT_NONE = 0,
RC_ALURESULT_X,
RC_ALURESULT_W
} rc_write_aluresult;
typedef enum {
RC_PRESUB_NONE = 0,
/** 1 - 2 * src0 */
RC_PRESUB_BIAS,
/** src1 - src0 */
RC_PRESUB_SUB,
/** src1 + src0 */
RC_PRESUB_ADD,
/** 1 - src0 */
RC_PRESUB_INV
} rc_presubtract_op;
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
switch(op){
case RC_PRESUB_BIAS:
case RC_PRESUB_INV:
return 1;
case RC_PRESUB_ADD:
case RC_PRESUB_SUB:
return 2;
default:
return 0;
}
}
#define RC_SOURCE_NONE 0x0
#define RC_SOURCE_RGB 0x1
#define RC_SOURCE_ALPHA 0x2
#endif /* RADEON_PROGRAM_CONSTANTS_H */

View file

@ -0,0 +1,239 @@
/*
* Copyright (C) 2008-2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program_pair.h"
#include "radeon_compiler_util.h"
#include <stdlib.h>
/**
* Return the source slot where we installed the given register access,
* or -1 if no slot was free anymore.
*/
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index)
{
int candidate = -1;
int candidate_quality = -1;
unsigned int alpha_used = 0;
unsigned int rgb_used = 0;
int i;
if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
/* Make sure only one presubtract operation is used per instruction. */
if (file == RC_FILE_PRESUB) {
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
}
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
if (pair->RGB.Src[i].File != file ||
pair->RGB.Src[i].Index != index) {
rgb_used++;
continue;
}
q++;
}
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
if (pair->Alpha.Src[i].File != file ||
pair->Alpha.Src[i].Index != index) {
alpha_used++;
continue;
}
q++;
}
}
if (q > candidate_quality) {
candidate_quality = q;
candidate = i;
}
}
if (file == RC_FILE_PRESUB) {
candidate = RC_PAIR_PRESUB_SRC;
} else if (candidate < 0 || (rgb && rgb_used > 2)
|| (alpha && alpha_used > 2)) {
return -1;
}
/* candidate >= 0 */
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i = 0; i < src_regs; i++) {
pair->RGB.Src[i].Used = 1;
}
}
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i=0; i < src_regs; i++) {
pair->Alpha.Src[i].Used = 1;
}
}
}
return candidate;
}
static void pair_foreach_source_callback(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb,
unsigned int swz,
unsigned int src)
{
/* swz > 3 means that the swizzle is either not used, or a constant
* swizzle (e.g. 0, 1, 0.5). */
if(swz > 3)
return;
if(swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->Alpha.Src[i]);
}
} else {
cb(data, &pair->Alpha.Src[src]);
}
} else {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->RGB.Src[i]);
}
}
else {
cb(data, &pair->RGB.Src[src]);
}
}
}
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->Alpha.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
pair_foreach_source_callback(pair, data, cb,
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
pair->Alpha.Arg[i].Source);
}
}
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->RGB.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
/* Find a swizzle that is either X,Y,Z,or W. We assume here
* that if one channel swizzles X,Y, or Z, then none of the
* other channels swizzle W, and vice-versa. */
for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
continue;
}
pair_foreach_source_callback(pair, data, cb,
swz,
pair->RGB.Arg[i].Source);
}
}
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg)
{
unsigned int type;
type = rc_source_type_swz(arg->Swizzle);
if (type & RC_SOURCE_RGB) {
return &pair_inst->RGB.Src[arg->Source];
} else if (type & RC_SOURCE_ALPHA) {
return &pair_inst->Alpha.Src[arg->Source];
} else {
return NULL;
}
}
int rc_pair_get_src_index(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_source * src)
{
int i;
for (i = 0; i < 3; i++) {
if (&pair_inst->RGB.Src[i] == src
|| &pair_inst->Alpha.Src[i] == src) {
return i;
}
}
return -1;
}

View file

@ -0,0 +1,137 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_PAIR_H_
#define __RADEON_PROGRAM_PAIR_H_
#include "radeon_code.h"
#include "radeon_opcodes.h"
#include "radeon_program_constants.h"
struct radeon_compiler;
/**
* \file
* Represents a paired ALU instruction, as found in R300 and R500
* fragment programs.
*
* Note that this representation is taking some liberties as far
* as register files are concerned, to allow separate register
* allocation.
*
* Also note that there are some subtleties in that the semantics
* of certain opcodes are implicitly changed in this representation;
* see \ref rc_pair_translate
*/
/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
* the presubtract value will be used, and
* {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
*/
#define RC_PAIR_PRESUB_SRC 3
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};
struct rc_pair_instruction_arg {
unsigned int Source:2;
unsigned int Swizzle:12;
unsigned int Abs:1;
unsigned int Negate:1;
};
struct rc_pair_sub_instruction {
unsigned int Opcode:8;
unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
unsigned int Target:2;
unsigned int OutputWriteMask:3;
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
struct rc_pair_instruction_source Src[4];
struct rc_pair_instruction_arg Arg[3];
};
struct rc_pair_instruction {
struct rc_pair_sub_instruction RGB;
struct rc_pair_sub_instruction Alpha;
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
unsigned int Nop:1;
};
typedef void (*rc_pair_foreach_src_fn)
(void *, struct rc_pair_instruction_source *);
/**
* General helper functions for dealing with the paired instruction format.
*/
/*@{*/
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index);
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg);
int rc_pair_get_src_index(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_source * src);
/*@}*/
/**
* Compiler passes that operate with the paired format.
*/
/*@{*/
struct radeon_pair_handler;
void rc_pair_translate(struct radeon_compiler *cc, void *user);
void rc_pair_schedule(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
/*@}*/
#endif /* __RADEON_PROGRAM_PAIR_H_ */

View file

@ -0,0 +1,418 @@
/*
* Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "radeon_program.h"
#include <stdio.h>
static const char * textarget_to_string(rc_texture_target target)
{
switch(target) {
case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
case RC_TEXTURE_CUBE: return "CUBE";
case RC_TEXTURE_3D: return "3D";
case RC_TEXTURE_RECT: return "RECT";
case RC_TEXTURE_2D: return "2D";
case RC_TEXTURE_1D: return "1D";
default: return "BAD_TEXTURE_TARGET";
}
}
static const char * presubtract_op_to_string(rc_presubtract_op op)
{
switch(op) {
case RC_PRESUB_NONE:
return "NONE";
case RC_PRESUB_BIAS:
return "(1 - 2 * src0)";
case RC_PRESUB_SUB:
return "(src1 - src0)";
case RC_PRESUB_ADD:
return "(src1 + src0)";
case RC_PRESUB_INV:
return "(1 - src0)";
default:
return "BAD_PRESUBTRACT_OP";
}
}
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
{
if (func == RC_COMPARE_FUNC_NEVER) {
fprintf(f, "false");
} else if (func == RC_COMPARE_FUNC_ALWAYS) {
fprintf(f, "true");
} else {
const char * op;
switch(func) {
case RC_COMPARE_FUNC_LESS: op = "<"; break;
case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
case RC_COMPARE_FUNC_GREATER: op = ">"; break;
case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
default: op = "???"; break;
}
fprintf(f, "%s %s %s", lhs, op, rhs);
}
}
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
{
if (file == RC_FILE_NONE) {
fprintf(f, "none");
} else if (file == RC_FILE_SPECIAL) {
switch(index) {
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
default: fprintf(f, "special[%i]", index); break;
}
} else {
const char * filename;
switch(file) {
case RC_FILE_TEMPORARY: filename = "temp"; break;
case RC_FILE_INPUT: filename = "input"; break;
case RC_FILE_OUTPUT: filename = "output"; break;
case RC_FILE_ADDRESS: filename = "addr"; break;
case RC_FILE_CONSTANT: filename = "const"; break;
default: filename = "BAD FILE"; break;
}
fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
}
}
static void rc_print_mask(FILE * f, unsigned int mask)
{
if (mask & RC_MASK_X) fprintf(f, "x");
if (mask & RC_MASK_Y) fprintf(f, "y");
if (mask & RC_MASK_Z) fprintf(f, "z");
if (mask & RC_MASK_W) fprintf(f, "w");
}
static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
{
rc_print_register(f, dst.File, dst.Index, 0);
if (dst.WriteMask != RC_MASK_XYZW) {
fprintf(f, ".");
rc_print_mask(f, dst.WriteMask);
}
}
static char rc_swizzle_char(unsigned int swz)
{
switch(swz) {
case RC_SWIZZLE_X: return 'x';
case RC_SWIZZLE_Y: return 'y';
case RC_SWIZZLE_Z: return 'z';
case RC_SWIZZLE_W: return 'w';
case RC_SWIZZLE_ZERO: return '0';
case RC_SWIZZLE_ONE: return '1';
case RC_SWIZZLE_HALF: return 'H';
case RC_SWIZZLE_UNUSED: return '_';
}
fprintf(stderr, "bad swz: %u\n", swz);
return '?';
}
static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
{
unsigned int comp;
for(comp = 0; comp < 4; ++comp) {
rc_swizzle swz = GET_SWZ(swizzle, comp);
if (GET_BIT(negate, comp))
fprintf(f, "-");
fprintf(f, "%c", rc_swizzle_char(swz));
}
}
static void rc_print_presub_instruction(FILE * f,
struct rc_presub_instruction inst)
{
fprintf(f,"(");
switch(inst.Opcode){
case RC_PRESUB_BIAS:
fprintf(f, "1 - 2 * ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_SUB:
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
fprintf(f, " - ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_ADD:
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
fprintf(f, " + ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_INV:
fprintf(f, "1 - ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
default:
break;
}
fprintf(f, ")");
}
static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
struct rc_src_register src)
{
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
if (src.Negate == RC_MASK_XYZW)
fprintf(f, "-");
if (src.Abs)
fprintf(f, "|");
if(src.File == RC_FILE_PRESUB)
rc_print_presub_instruction(f, inst->U.I.PreSub);
else
rc_print_register(f, src.File, src.Index, src.RelAddr);
if (src.Abs && !trivial_negate)
fprintf(f, "|");
if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
fprintf(f, ".");
rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
}
if (src.Abs && trivial_negate)
fprintf(f, "|");
}
static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
{
switch (opcode) {
case RC_OPCODE_IF:
case RC_OPCODE_BGNLOOP:
return (*branch_depth)++ * 2;
case RC_OPCODE_ENDIF:
case RC_OPCODE_ENDLOOP:
assert(*branch_depth > 0);
return --(*branch_depth) * 2;
case RC_OPCODE_ELSE:
assert(*branch_depth > 0);
return (*branch_depth - 1) * 2;
default:
return *branch_depth * 2;
}
}
static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
{
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int reg;
unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
fprintf(f, "%s", opcode->Name);
switch(inst->U.I.SaturateMode) {
case RC_SATURATE_NONE: break;
case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
default: fprintf(f, "_BAD_SAT"); break;
}
if (opcode->HasDstReg) {
fprintf(f, " ");
rc_print_dst_register(f, inst->U.I.DstReg);
if (opcode->NumSrcRegs)
fprintf(f, ",");
}
for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
if (reg > 0)
fprintf(f, ",");
fprintf(f, " ");
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
}
if (opcode->HasTexture) {
fprintf(f, ", %s%s[%u]",
textarget_to_string(inst->U.I.TexSrcTarget),
inst->U.I.TexShadow ? "SHADOW" : "",
inst->U.I.TexSrcUnit);
}
fprintf(f, ";");
if (inst->U.I.WriteALUResult) {
fprintf(f, " [aluresult = (");
rc_print_comparefunc(f,
(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
inst->U.I.ALUResultCompare, "0");
fprintf(f, ")]");
}
fprintf(f, "\n");
}
static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
int printedsrc = 0;
unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
for(unsigned int src = 0; src < 3; ++src) {
if (inst->RGB.Src[src].Used) {
if (printedsrc)
fprintf(f, ", ");
fprintf(f, "src%i.xyz = ", src);
rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
printedsrc = 1;
}
if (inst->Alpha.Src[src].Used) {
if (printedsrc)
fprintf(f, ", ");
fprintf(f, "src%i.w = ", src);
rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
printedsrc = 1;
}
}
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.xyz = %s",
presubtract_op_to_string(
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
}
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.w = %s",
presubtract_op_to_string(
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
}
fprintf(f, "\n");
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
if (inst->RGB.WriteMask)
fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
(inst->RGB.WriteMask & 1) ? "x" : "",
(inst->RGB.WriteMask & 2) ? "y" : "",
(inst->RGB.WriteMask & 4) ? "z" : "");
if (inst->RGB.OutputWriteMask)
fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
(inst->RGB.OutputWriteMask & 1) ? "x" : "",
(inst->RGB.OutputWriteMask & 2) ? "y" : "",
(inst->RGB.OutputWriteMask & 4) ? "z" : "");
if (inst->WriteALUResult == RC_ALURESULT_X)
fprintf(f, " aluresult");
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->RGB.Arg[arg].Source);
fprintf(f,".%c%c%c%s",
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
abs);
}
fprintf(f, "\n");
}
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
if (inst->Alpha.WriteMask)
fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
if (inst->Alpha.OutputWriteMask)
fprintf(f, " color[%i].w", inst->Alpha.Target);
if (inst->Alpha.DepthWriteMask)
fprintf(f, " depth.w");
if (inst->WriteALUResult == RC_ALURESULT_W)
fprintf(f, " aluresult");
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
fprintf(f,".%c%s",
rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
}
fprintf(f, "\n");
}
if (inst->WriteALUResult) {
for (unsigned i = 0; i < spaces; i++)
fprintf(f, " ");
fprintf(f, " [aluresult = (");
rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
fprintf(f, ")]\n");
}
}
/**
* Print program to stderr, default options.
*/
void rc_print_program(const struct rc_program *prog)
{
unsigned int linenum = 0;
unsigned branch_depth = 0;
struct rc_instruction *inst;
fprintf(stderr, "# Radeon Compiler Program\n");
for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
fprintf(stderr, "%3d: ", linenum);
if (inst->Type == RC_INSTRUCTION_PAIR)
rc_print_pair_instruction(stderr, inst, &branch_depth);
else
rc_print_normal_instruction(stderr, inst, &branch_depth);
linenum++;
}
}

View file

@ -0,0 +1,528 @@
/*
* Copyright (C) 2010 Corbin Simpson
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program_tex.h"
#include "radeon_compiler_util.h"
/* Series of transformations to be done on textures. */
static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
int tmu)
{
struct rc_src_register reg = { 0, };
if (compiler->enable_shadow_ambient) {
reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = RC_SWIZZLE_WWWW;
} else {
reg.File = RC_FILE_NONE;
reg.Swizzle = RC_SWIZZLE_0000;
}
reg.Swizzle = combine_swizzles(reg.Swizzle,
compiler->state.unit[tmu].texture_swizzle);
return reg;
}
static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
int tmu)
{
struct rc_src_register reg = { 0, };
reg.File = RC_FILE_NONE;
reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
compiler->state.unit[tmu].texture_swizzle);
return reg;
}
static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
struct rc_instruction *inst,
unsigned state_constant)
{
struct rc_instruction *inst_mov;
unsigned temp = rc_find_free_temporary(&compiler->Base);
inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MUL;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mov->U.I.SrcReg[1].Index =
rc_constants_add_state(&compiler->Base.Program.Constants,
state_constant, inst->U.I.TexSrcUnit);
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
static void projective_divide(struct r300_fragment_program_compiler *compiler,
struct rc_instruction *inst)
{
struct rc_instruction *inst_mul, *inst_rcp;
unsigned temp = rc_find_free_temporary(&compiler->Base);
inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = temp;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
/* Because the input can be arbitrarily swizzled,
* read the component mapped to W. */
inst_rcp->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = temp;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = temp;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.Opcode = RC_OPCODE_TEX;
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following ways:
* - implement texture compare (shadow extensions)
* - extract non-native source / destination operands
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*/
int radeonTransformTEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
if (inst->U.I.Opcode != RC_OPCODE_TEX &&
inst->U.I.Opcode != RC_OPCODE_TXB &&
inst->U.I.Opcode != RC_OPCODE_TXP &&
inst->U.I.Opcode != RC_OPCODE_TXD &&
inst->U.I.Opcode != RC_OPCODE_TXL &&
inst->U.I.Opcode != RC_OPCODE_KIL)
return 0;
/* ARB_shadow & EXT_shadow_funcs */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
inst->U.I.Opcode = RC_OPCODE_MOV;
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
} else {
inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
}
return 1;
} else {
struct rc_instruction * inst_rcp = NULL;
struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
unsigned tmp_texsample;
unsigned tmp_sum;
int pass, fail;
/* Save the output register. */
struct rc_dst_register output_reg = inst->U.I.DstReg;
unsigned saturate_mode = inst->U.I.SaturateMode;
/* Redirect TEX to a new temp. */
tmp_texsample = rc_find_free_temporary(c);
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
tmp_sum = rc_find_free_temporary(c);
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
/* Compute 1/W. */
inst_rcp = rc_insert_new_instruction(c, inst);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_rcp->U.I.DstReg.Index = tmp_sum;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_rcp->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
}
/* Divide Z by W (if it's TXP) and saturate. */
inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = tmp_sum;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[1].Index = tmp_sum;
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
}
/* Add the depth texture value. */
inst_add = rc_insert_new_instruction(c, inst_mul);
inst_add->U.I.Opcode = RC_OPCODE_ADD;
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = tmp_sum;
inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[0].Index = tmp_sum;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[1].Index = tmp_texsample;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
* LESS: r < tex <=> -tex+r < 0
* GEQUAL: r >= tex <=> not (-tex+r < 0)
* GREATER: r > tex <=> tex-r < 0
* LEQUAL: r <= tex <=> not ( tex-r < 0)
* EQUAL: GEQUAL
* NOTEQUAL:LESS
*/
/* This negates either r or tex: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
else
inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
/* This negates the whole expresion: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
inst_cmp = rc_insert_new_instruction(c, inst_add);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
inst_cmp->U.I.SaturateMode = saturate_mode;
inst_cmp->U.I.DstReg = output_reg;
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
inst_cmp->U.I.SrcReg[0].Swizzle =
combine_swizzles(RC_SWIZZLE_WWWW,
compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
assert(tmp_texsample != tmp_sum);
}
}
/* R300 cannot sample from rectangles and the wrap mode fallback needs
* normalized coordinates anyway. */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
}
/* Divide by W if needed. */
if (inst->U.I.Opcode == RC_OPCODE_TXP &&
(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
projective_divide(compiler, inst);
}
/* Texture wrap modes don't work on NPOT textures.
*
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
* mirroring are not. If we need to repeat, we do:
*
* MUL temp, texcoord, <scaling factor constant>
* FRC temp, temp ; Discard integer portion of coords
*
* This gives us coords in [0, 1].
*
* Mirroring is trickier. We're going to start out like repeat:
*
* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
* ; so scale to [0, 1]
* FRC temp, temp ; Make the pattern repeat
* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
* ; The pattern is backwards, so reverse it (1-x).
*
* This gives us coords in [0, 1].
*
* ~ C & M. ;)
*/
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
wrapmode != RC_WRAP_NONE) {
struct rc_instruction *inst_mov;
unsigned temp = rc_find_free_temporary(c);
if (wrapmode == RC_WRAP_REPEAT) {
/* Both instructions will be paired up. */
struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_frc->U.I.DstReg.Index = temp;
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
/*
* Function:
* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
*
* Code:
* MUL temp, src0, 0.5
* FRC temp, temp
* MAD temp, temp, 2, -1
* ADD temp, 1, -abs(temp)
*/
struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
unsigned two, two_swizzle;
inst_mul = rc_insert_new_instruction(c, inst->Prev);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = temp;
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
inst_frc = rc_insert_new_instruction(c, inst->Prev);
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_frc->U.I.DstReg.Index = temp;
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_frc->U.I.SrcReg[0].Index = temp;
inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
inst_mad = rc_insert_new_instruction(c, inst->Prev);
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = temp;
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mad->U.I.SrcReg[0].Index = temp;
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
inst_mad->U.I.SrcReg[1].Index = two;
inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
inst_add = rc_insert_new_instruction(c, inst->Prev);
inst_add->U.I.Opcode = RC_OPCODE_ADD;
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_add->U.I.DstReg.Index = temp;
inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_add->U.I.SrcReg[1].Index = temp;
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
inst_add->U.I.SrcReg[1].Abs = 1;
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
/*
* Mirrored clamp modes are bloody simple, we just use abs
* to mirror [0, 1] into [-1, 0]. This works for
* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
*/
struct rc_instruction *inst_mov;
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mov->U.I.SrcReg[0].Abs = 1;
}
/* Preserve W for TXP/TXB. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
}
/* NPOT -> POT conversion for 3D textures. */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
struct rc_instruction *inst_mov;
unsigned temp = rc_find_free_temporary(c);
/* Saturate XYZ. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
/* Copy W. */
inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = temp;
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = temp;
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
}
/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
* Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
*/
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
unsigned two, two_swizzle;
struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
inst_mul = rc_insert_new_instruction(c, inst);
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
inst_mul->U.I.SrcReg[1].Index = two;
inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
inst_mad = rc_insert_new_instruction(c, inst_mul);
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
inst_cnd = rc_insert_new_instruction(c, inst_mad);
inst_cnd->U.I.Opcode = RC_OPCODE_CND;
inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
inst_cnd->U.I.DstReg = inst->U.I.DstReg;
inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot write texture to output registers or with saturate (all chips),
* or with masks (non-r500). */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst->U.I.SaturateMode ||
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
inst_mov->U.I.DstReg = inst->U.I.DstReg;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
reset_srcreg(&inst->U.I.SrcReg[0]);
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
return 1;
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (C) 2010 Corbin Simpson
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_TEX_H_
#define __RADEON_PROGRAM_TEX_H_
#include "radeon_compiler.h"
#include "radeon_program.h"
int radeonTransformTEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data);
#endif /* __RADEON_PROGRAM_TEX_H_ */

View file

@ -0,0 +1,150 @@
/*
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_remove_constants.h"
#include "radeon_dataflow.h"
struct mark_used_data {
unsigned char * const_used;
unsigned * has_rel_addr;
};
static void remap_regs(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
unsigned *inv_remap_table = userdata;
if (*pfile == RC_FILE_CONSTANT) {
*pindex = inv_remap_table[*pindex];
}
}
static void mark_used(void * userdata, struct rc_instruction * inst,
struct rc_src_register * src)
{
struct mark_used_data * d = userdata;
if (src->File == RC_FILE_CONSTANT) {
if (src->RelAddr) {
*d->has_rel_addr = 1;
} else {
d->const_used[src->Index] = 1;
}
}
}
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
unsigned char *const_used;
unsigned *remap_table;
unsigned *inv_remap_table;
unsigned has_rel_addr = 0;
unsigned is_identity = 1;
unsigned are_externals_remapped = 0;
struct rc_constant *constants = c->Program.Constants.Constants;
struct mark_used_data d;
unsigned new_count;
if (!c->Program.Constants.Count) {
*out_remap_table = NULL;
return;
}
const_used = malloc(c->Program.Constants.Count);
memset(const_used, 0, c->Program.Constants.Count);
d.const_used = const_used;
d.has_rel_addr = &has_rel_addr;
/* Pass 1: Mark used constants. */
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_for_all_reads_src(inst, mark_used, &d);
}
/* Pass 2: If there is relative addressing or dead constant elimination
* is disabled, mark all externals as used. */
if (has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
}
/* Pass 3: Make the remapping table and remap constants.
* This pass removes unused constants simply by overwriting them by other constants. */
remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
new_count = 0;
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (const_used[i]) {
remap_table[new_count] = i;
inv_remap_table[i] = new_count;
if (i != new_count) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
are_externals_remapped = 1;
constants[new_count] = constants[i];
is_identity = 0;
}
new_count++;
}
}
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
}
/* Set the new constant count. Note that new_count may be less than
* Count even though the remapping function is identity. In that case,
* the constants have been removed at the end of the array. */
c->Program.Constants.Count = new_count;
if (are_externals_remapped) {
*out_remap_table = remap_table;
} else {
*out_remap_table = NULL;
free(remap_table);
}
free(const_used);
free(inv_remap_table);
if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
}

View file

@ -0,0 +1,35 @@
/*
* Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_REMOVE_CONSTANTS_H
#define RADEON_REMOVE_CONSTANTS_H
#include "radeon_compiler.h"
void rc_remove_unused_constants(struct radeon_compiler *c, void *user);
#endif

View file

@ -0,0 +1,92 @@
/*
* Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*/
#include "radeon_rename_regs.h"
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
/**
* This function renames registers in an attempt to get the code close to
* SSA form. After this function has completed, most of the register are only
* written to one time, with a few exceptions.
*
* This function assumes all the instructions are still of type
* RC_INSTRUCTION_NORMAL.
*/
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
unsigned int i, used_length;
int new_index;
struct rc_instruction * inst;
struct rc_reader_data reader_data;
unsigned char * used;
/* XXX Remove this once the register allocation works with flow control. */
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
return;
}
used_length = 2 * rc_recompute_ips(c);
used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
memset(used, 0, sizeof(unsigned char) * used_length);
rc_get_used_temporaries(c, used, used_length);
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
continue;
reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
if (reader_data.Abort || reader_data.ReaderCount == 0)
continue;
new_index = rc_find_free_temporary_list(c, used, used_length,
RC_MASK_XYZW);
if (new_index < 0) {
rc_error(c, "Ran out of temporary registers\n");
return;
}
reader_data.Writer->U.I.DstReg.Index = new_index;
for(i = 0; i < reader_data.ReaderCount; i++) {
reader_data.Readers[i].U.I.Src->Index = new_index;
}
}
}

View file

@ -0,0 +1,9 @@
#ifndef RADEON_RENAME_REGS_H
#define RADEON_RENAME_REGS_H
struct radeon_compiler;
void rc_rename_regs(struct radeon_compiler *c, void *user);
#endif /* RADEON_RENAME_REGS_H */

View file

@ -0,0 +1,57 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_SWIZZLE_H
#define RADEON_SWIZZLE_H
#include "radeon_program.h"
struct rc_swizzle_split {
unsigned char NumPhases;
unsigned char Phase[4];
};
/**
* Describe the swizzling capability of target hardware.
*/
struct rc_swizzle_caps {
/**
* Check whether the given swizzle, absolute and negate combination
* can be implemented natively by the hardware for this opcode.
*
* \return 1 if the swizzle is native for the given opcode
*/
int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
/**
* Determine how to split access to the masked channels of the
* given source register to obtain ALU-native swizzles.
*/
void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
};
#endif /* RADEON_SWIZZLE_H */

View file

@ -0,0 +1,517 @@
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_variable.h"
#include "memory_pool.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_list.h"
#include "radeon_opcodes.h"
#include "radeon_program.h"
/**
* Rewrite the index and writemask for the destination register of var
* and its friends to new_index and new_writemask. This function also takes
* care of rewriting the swizzles for the sources of var.
*/
void rc_variable_change_dst(
struct rc_variable * var,
unsigned int new_index,
unsigned int new_writemask)
{
struct rc_variable * var_ptr;
struct rc_list * readers;
unsigned int old_mask = rc_variable_writemask_sum(var);
unsigned int conversion_swizzle =
rc_make_conversion_swizzle(old_mask, new_writemask);
for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
rc_normal_rewrite_writemask(var_ptr->Inst,
conversion_swizzle);
var_ptr->Inst->U.I.DstReg.Index = new_index;
} else {
struct rc_pair_sub_instruction * sub;
if (var_ptr->Dst.WriteMask == RC_MASK_W) {
assert(new_writemask & RC_MASK_W);
sub = &var_ptr->Inst->U.P.Alpha;
} else {
sub = &var_ptr->Inst->U.P.RGB;
rc_pair_rewrite_writemask(sub,
conversion_swizzle);
}
sub->DestIndex = new_index;
}
}
readers = rc_variable_readers_union(var);
for ( ; readers; readers = readers->Next) {
struct rc_reader * reader = readers->Item;
if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
reader->U.I.Src->Index = new_index;
reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
reader->U.I.Src->Swizzle, conversion_swizzle);
} else {
struct rc_pair_instruction * pair_inst =
&reader->Inst->U.P;
unsigned int src_type = rc_source_type_swz(
reader->U.P.Arg->Swizzle);
int src_index = reader->U.P.Arg->Source;
if (src_index == RC_PAIR_PRESUB_SRC) {
src_index = rc_pair_get_src_index(
pair_inst, reader->U.P.Src);
}
/* Try to delete the old src, it is OK if this fails,
* because rc_pair_alloc_source might be able to
* find a source the ca be reused.
*/
if (rc_pair_remove_src(reader->Inst, src_type,
src_index, old_mask)) {
/* Reuse the source index of the source that
* was just deleted and set its register
* index. We can't use rc_pair_alloc_source
* for this becuase it might return a source
* index that is already being used. */
if (src_type & RC_SOURCE_RGB) {
pair_inst->RGB.Src[src_index]
.Used = 1;
pair_inst->RGB.Src[src_index]
.Index = new_index;
pair_inst->RGB.Src[src_index]
.File = RC_FILE_TEMPORARY;
}
if (src_type & RC_SOURCE_ALPHA) {
pair_inst->Alpha.Src[src_index]
.Used = 1;
pair_inst->Alpha.Src[src_index]
.Index = new_index;
pair_inst->Alpha.Src[src_index]
.File = RC_FILE_TEMPORARY;
}
} else {
src_index = rc_pair_alloc_source(
&reader->Inst->U.P,
src_type & RC_SOURCE_RGB,
src_type & RC_SOURCE_ALPHA,
RC_FILE_TEMPORARY,
new_index);
if (src_index < 0) {
rc_error(var->C, "Rewrite of inst %u failed "
"Can't allocate source for "
"Inst %u src_type=%x "
"new_index=%u new_mask=%u\n",
var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
continue;
}
}
reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
reader->U.P.Arg->Swizzle, conversion_swizzle);
if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
reader->U.P.Arg->Source = src_index;
}
}
}
}
/**
* Compute the live intervals for var and its friends.
*/
void rc_variable_compute_live_intervals(struct rc_variable * var)
{
while(var) {
unsigned int i;
unsigned int start = var->Inst->IP;
for (i = 0; i < var->ReaderCount; i++) {
unsigned int chan;
unsigned int chan_start = start;
unsigned int chan_end = var->Readers[i].Inst->IP;
unsigned int mask = var->Readers[i].WriteMask;
struct rc_instruction * inst;
/* Extend the live interval of T0 to the start of the
* loop for sequences like:
* BGNLOOP
* read T0
* ...
* write T0
* ENDLOOP
*/
if (var->Readers[i].Inst->IP < start) {
struct rc_instruction * bgnloop =
rc_match_endloop(var->Readers[i].Inst);
chan_start = bgnloop->IP;
}
/* Extend the live interval of T0 to the start of the
* loop in case there is a BRK instruction in the loop
* (we don't actually check for a BRK instruction we
* assume there is one somewhere in the loop, which
* there usually is) for sequences like:
* BGNLOOP
* ...
* conditional BRK
* ...
* write T0
* ENDLOOP
* read T0
***************************************************
* Extend the live interval of T0 to the end of the
* loop for sequences like:
* write T0
* BGNLOOP
* ...
* read T0
* ENDLOOP
*/
for (inst = var->Inst; inst != var->Readers[i].Inst;
inst = inst->Next) {
rc_opcode op = rc_get_flow_control_inst(inst);
if (op == RC_OPCODE_ENDLOOP) {
struct rc_instruction * bgnloop =
rc_match_endloop(inst);
if (bgnloop->IP < chan_start) {
chan_start = bgnloop->IP;
}
} else if (op == RC_OPCODE_BGNLOOP) {
struct rc_instruction * endloop =
rc_match_bgnloop(inst);
if (endloop->IP > chan_end) {
chan_end = endloop->IP;
}
}
}
for (chan = 0; chan < 4; chan++) {
if ((mask >> chan) & 0x1) {
if (!var->Live[chan].Used
|| chan_start < var->Live[chan].Start) {
var->Live[chan].Start =
chan_start;
}
if (!var->Live[chan].Used
|| chan_end > var->Live[chan].End) {
var->Live[chan].End = chan_end;
}
var->Live[chan].Used = 1;
}
}
}
var = var->Friend;
}
}
/**
* @return 1 if a and b share a reader
* @return 0 if they do not
*/
static unsigned int readers_intersect(
struct rc_variable * a,
struct rc_variable * b)
{
unsigned int a_index, b_index;
for (a_index = 0; a_index < a->ReaderCount; a_index++) {
struct rc_reader reader_a = a->Readers[a_index];
for (b_index = 0; b_index < b->ReaderCount; b_index++) {
struct rc_reader reader_b = b->Readers[b_index];
if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
&& reader_a.U.I.Src == reader_b.U.I.Src) {
return 1;
}
if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR
&& reader_a.U.P.Src == reader_b.U.P.Src) {
return 1;
}
}
}
return 0;
}
void rc_variable_add_friend(
struct rc_variable * var,
struct rc_variable * friend)
{
assert(var->Dst.Index == friend->Dst.Index);
while(var->Friend) {
var = var->Friend;
}
var->Friend = friend;
}
struct rc_variable * rc_variable(
struct radeon_compiler * c,
unsigned int DstFile,
unsigned int DstIndex,
unsigned int DstWriteMask,
struct rc_reader_data * reader_data)
{
struct rc_variable * new =
memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
memset(new, 0, sizeof(struct rc_variable));
new->C = c;
new->Dst.File = DstFile;
new->Dst.Index = DstIndex;
new->Dst.WriteMask = DstWriteMask;
if (reader_data) {
new->Inst = reader_data->Writer;
new->ReaderCount = reader_data->ReaderCount;
new->Readers = reader_data->Readers;
}
return new;
}
static void get_variable_helper(
struct rc_list ** variable_list,
struct rc_variable * variable)
{
struct rc_list * list_ptr;
for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
if (readers_intersect(variable, list_ptr->Item)) {
rc_variable_add_friend(list_ptr->Item, variable);
return;
}
}
rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
}
static void get_variable_pair_helper(
struct rc_list ** variable_list,
struct radeon_compiler * c,
struct rc_instruction * inst,
struct rc_pair_sub_instruction * sub_inst)
{
struct rc_reader_data reader_data;
struct rc_variable * new_var;
rc_register_file file;
unsigned int writemask;
if (sub_inst->Opcode == RC_OPCODE_NOP) {
return;
}
memset(&reader_data, 0, sizeof(struct rc_reader_data));
rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
if (reader_data.ReaderCount == 0) {
return;
}
if (sub_inst->WriteMask) {
file = RC_FILE_TEMPORARY;
writemask = sub_inst->WriteMask;
} else if (sub_inst->OutputWriteMask) {
file = RC_FILE_OUTPUT;
writemask = sub_inst->OutputWriteMask;
} else {
writemask = 0;
file = RC_FILE_NONE;
}
new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
&reader_data);
get_variable_helper(variable_list, new_var);
}
/**
* Generate a list of variables used by the shader program. Each instruction
* that writes to a register is considered a variable. The struct rc_variable
* data structure includes a list of readers and is essentially a
* definition-use chain. Any two variables that share a reader are considered
* "friends" and they are linked together via the Friend attribute.
*/
struct rc_list * rc_get_variables(struct radeon_compiler * c)
{
struct rc_instruction * inst;
struct rc_list * variable_list = NULL;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
struct rc_reader_data reader_data;
struct rc_variable * new_var;
memset(&reader_data, 0, sizeof(reader_data));
if (inst->Type == RC_INSTRUCTION_NORMAL) {
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
if (reader_data.ReaderCount == 0) {
continue;
}
new_var = rc_variable(c, inst->U.I.DstReg.File,
inst->U.I.DstReg.Index,
inst->U.I.DstReg.WriteMask, &reader_data);
get_variable_helper(&variable_list, new_var);
} else {
get_variable_pair_helper(&variable_list, c, inst,
&inst->U.P.RGB);
get_variable_pair_helper(&variable_list, c, inst,
&inst->U.P.Alpha);
}
}
return variable_list;
}
/**
* @return The bitwise or of the writemasks of a variable and all of its
* friends.
*/
unsigned int rc_variable_writemask_sum(struct rc_variable * var)
{
unsigned int writemask = 0;
while(var) {
writemask |= var->Dst.WriteMask;
var = var->Friend;
}
return writemask;
}
/*
* @return A list of readers for a variable and its friends. Readers
* that read from two different variable friends are only included once in
* this list.
*/
struct rc_list * rc_variable_readers_union(struct rc_variable * var)
{
struct rc_list * list = NULL;
while (var) {
unsigned int i;
for (i = 0; i < var->ReaderCount; i++) {
struct rc_list * temp;
struct rc_reader * a = &var->Readers[i];
unsigned int match = 0;
for (temp = list; temp; temp = temp->Next) {
struct rc_reader * b = temp->Item;
if (a->Inst->Type != b->Inst->Type) {
continue;
}
if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
if (a->U.I.Src == b->U.I.Src) {
match = 1;
break;
}
}
if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
if (a->U.P.Arg == b->U.P.Arg
&& a->U.P.Src == b->U.P.Src) {
match = 1;
break;
}
}
}
if (match) {
continue;
}
rc_list_add(&list, rc_list(&var->C->Pool, a));
}
var = var->Friend;
}
return list;
}
static unsigned int reader_equals_src(
struct rc_reader reader,
unsigned int src_type,
void * src)
{
if (reader.Inst->Type != src_type) {
return 0;
}
if (src_type == RC_INSTRUCTION_NORMAL) {
return reader.U.I.Src == src;
} else {
return reader.U.P.Src == src;
}
}
static unsigned int variable_writes_src(
struct rc_variable * var,
unsigned int src_type,
void * src)
{
unsigned int i;
for (i = 0; i < var->ReaderCount; i++) {
if (reader_equals_src(var->Readers[i], src_type, src)) {
return 1;
}
}
return 0;
}
struct rc_list * rc_variable_list_get_writers(
struct rc_list * var_list,
unsigned int src_type,
void * src)
{
struct rc_list * list_ptr;
struct rc_list * writer_list = NULL;
for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
struct rc_variable * var = list_ptr->Item;
if (variable_writes_src(var, src_type, src)) {
struct rc_variable * friend;
rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
for (friend = var->Friend; friend;
friend = friend->Friend) {
if (variable_writes_src(friend, src_type, src)) {
rc_list_add(&writer_list,
rc_list(&var->C->Pool, friend));
}
}
/* Once we have indentifed the variable and its
* friends that write this source, we can stop
* stop searching, because we know know of the
* other variables in the list will write this source.
* If they did they would be friends of var.
*/
break;
}
}
return writer_list;
}
void rc_variable_print(struct rc_variable * var)
{
unsigned int i;
while (var) {
fprintf(stderr, "%u: TEMP[%u].%u: ",
var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
for (i = 0; i < 4; i++) {
fprintf(stderr, "chan %u: start=%u end=%u ", i,
var->Live[i].Start, var->Live[i].End);
}
fprintf(stderr, "%u readers\n", var->ReaderCount);
if (var->Friend) {
fprintf(stderr, "Friend: \n\t");
}
var = var->Friend;
}
}

View file

@ -0,0 +1,89 @@
/*
* Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef RADEON_VARIABLE_H
#define RADEON_VARIABLE_H
#include "radeon_compiler.h"
struct radeon_compiler;
struct rc_list;
struct rc_reader_data;
struct rc_readers;
struct live_intervals {
int Start;
int End;
int Used;
};
struct rc_variable {
struct radeon_compiler * C;
struct rc_dst_register Dst;
struct rc_instruction * Inst;
unsigned int ReaderCount;
struct rc_reader * Readers;
struct live_intervals Live[4];
/* A friend is a variable that shares a reader with another variable.
*/
struct rc_variable * Friend;
};
void rc_variable_change_dst(
struct rc_variable * var,
unsigned int new_index,
unsigned int new_writemask);
void rc_variable_compute_live_intervals(struct rc_variable * var);
void rc_variable_add_friend(
struct rc_variable * var,
struct rc_variable * friend);
struct rc_variable * rc_variable(
struct radeon_compiler * c,
unsigned int DstFile,
unsigned int DstIndex,
unsigned int DstWriteMask,
struct rc_reader_data * reader_data);
struct rc_list * rc_get_variables(struct radeon_compiler * c);
unsigned int rc_variable_writemask_sum(struct rc_variable * var);
struct rc_list * rc_variable_readers_union(struct rc_variable * var);
struct rc_list * rc_variable_list_get_writers(
struct rc_list * var_list,
unsigned int src_type,
void * src);
void rc_variable_print(struct rc_variable * var);
#endif /* RADEON_VARIABLE_H */

View file

@ -0,0 +1 @@
radeon_compiler_util_tests

View file

@ -0,0 +1,53 @@
TOP = ../../../../../..
include $(TOP)/configs/current
CFLAGS += -Wall -Werror
### Basic defines ###
TESTS = radeon_compiler_util_tests
TEST_SOURCES := $(TESTS:=.c)
SHARED_SOURCES = \
rc_test_helpers.c \
unit_test.c
C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES)
INCLUDES = \
-I. \
-I..
COMPILER_LIB = ../../libr300.a
##### TARGETS #####
default: depend run_tests
depend: $(C_SOURCES)
rm -f depend
touch depend
$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null
# Remove .o and backup files
clean:
rm -f $(TESTS) depend depend.bak
$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB)
$(APP_CC) -o $@ $^
run_tests: $(TESTS)
@echo "RUNNING TESTS:"
@echo ""
$(foreach test, $^, @./$(test))
.PHONY: $(COMPILER_LIB)
$(COMPILER_LIB):
$(MAKE) -C ../..
##### RULES #####
.c.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
sinclude depend

View file

@ -0,0 +1,76 @@
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "radeon_compiler_util.h"
#include "radeon_program.h"
#include "rc_test_helpers.h"
#include "unit_test.h"
static void test_rc_inst_can_use_presub(
struct test_result * result,
int expected,
const char * add_str,
const char * replace_str)
{
struct rc_instruction add_inst, replace_inst;
int ret;
test_begin(result);
init_rc_normal_instruction(&add_inst, add_str);
init_rc_normal_instruction(&replace_inst, replace_str);
ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
&replace_inst.U.I.SrcReg[0],
&add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
test_check(result, ret == expected);
}
static void test_runner_rc_inst_can_use_presub(struct test_result * result)
{
/* This tests the case where the source being replace has the same
* register file and register index as another source register in the
* CMP instruction. A previous version of this function was ignoring
* all registers that shared the same file and index as the replacement
* register when counting the number of source selects.
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[0].z, temp[6].__x_, const[1].__x_;",
"CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;");
/* Testing a random case that should fail
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[3], temp[1], temp[2];",
"MAD temp[1], temp[0], const[0].xxxx, -temp[3];");
/* This tests the case where the arguments of the ADD
* instruction share the same register file and index. Normally, we
* would need only one source select for these two arguments, but since
* they will be part of a presubtract operation we need to use the two
* source selects that the presubtract instruction expects
* (src0 and src1).
*
* https://bugs.freedesktop.org/show_bug.cgi?id=36527
*/
test_rc_inst_can_use_presub(result, 0,
"ADD temp[3].x, temp[0].x___, temp[0].x___;",
"MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;");
}
int main(int argc, char ** argv)
{
struct test tests[] = {
{"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub},
{NULL, NULL}
};
run_tests(tests);
}

View file

@ -0,0 +1,380 @@
#include <errno.h>
#include <regex.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include "../radeon_compiler_util.h"
#include "../radeon_opcodes.h"
#include "../radeon_program.h"
#include "rc_test_helpers.h"
/* This file contains some helper functions for filling out the rc_instruction
* data structures. These functions take a string as input based on the format
* output by rc_program_print().
*/
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
#define REGEX_ERR_BUF_SIZE 50
struct match_info {
const char * String;
int Length;
};
static int match_length(regmatch_t * matches, int index)
{
return matches[index].rm_eo - matches[index].rm_so;
}
static int regex_helper(
const char * regex_str,
const char * search_str,
regmatch_t * matches,
int num_matches)
{
char err_buf[REGEX_ERR_BUF_SIZE];
regex_t regex;
int err_code;
unsigned int i;
err_code = regcomp(&regex, regex_str, REG_EXTENDED);
if (err_code) {
regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
fprintf(stderr, "Failed to compile regex: %s\n", err_buf);
return 0;
}
err_code = regexec(&regex, search_str, num_matches, matches, 0);
DBG("Search string: '%s'\n", search_str);
for (i = 0; i < num_matches; i++) {
DBG("Match %u start = %d end = %d\n", i,
matches[i].rm_so, matches[i].rm_eo);
}
if (err_code) {
regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
fprintf(stderr, "Failed to match regex: %s\n", err_buf);
return 0;
}
return 1;
}
#define REGEX_SRC_MATCHES 6
struct src_tokens {
struct match_info Negate;
struct match_info Abs;
struct match_info File;
struct match_info Index;
struct match_info Swizzle;
};
/**
* Initialize the source register at index src_index for the instruction based
* on src_str.
*
* NOTE: Warning in init_rc_normal_instruction() applies to this function as
* well.
*
* @param src_str A string that represents the source register. The format for
* this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
int init_rc_normal_src(
struct rc_instruction * inst,
unsigned int src_index,
const char * src_str)
{
const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)";
regmatch_t matches[REGEX_SRC_MATCHES];
struct src_tokens tokens;
struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index];
unsigned int i;
/* Execute the regex */
if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) {
fprintf(stderr, "Failed to execute regex for src register.\n");
return 0;
}
/* Create Tokens */
tokens.Negate.String = src_str + matches[1].rm_so;
tokens.Negate.Length = match_length(matches, 1);
tokens.Abs.String = src_str + matches[2].rm_so;
tokens.Abs.Length = match_length(matches, 2);
tokens.File.String = src_str + matches[3].rm_so;
tokens.File.Length = match_length(matches, 3);
tokens.Index.String = src_str + matches[4].rm_so;
tokens.Index.Length = match_length(matches, 4);
tokens.Swizzle.String = src_str + matches[5].rm_so;
tokens.Swizzle.Length = match_length(matches, 5);
/* Negate */
if (tokens.Negate.Length > 0) {
src_reg->Negate = RC_MASK_XYZW;
}
/* Abs */
if (tokens.Abs.Length > 0) {
src_reg->Abs = 1;
}
/* File */
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
src_reg->File = RC_FILE_TEMPORARY;
} else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) {
src_reg->File = RC_FILE_INPUT;
} else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) {
src_reg->File = RC_FILE_CONSTANT;
} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
src_reg->File = RC_FILE_NONE;
}
/* Index */
errno = 0;
src_reg->Index = strtol(tokens.Index.String, NULL, 10);
if (errno > 0) {
fprintf(stderr, "Could not convert src register index.\n");
return 0;
}
/* Swizzle */
if (tokens.Swizzle.Length == 0) {
src_reg->Swizzle = RC_SWIZZLE_XYZW;
} else {
int str_index = 1;
src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED);
if (tokens.Swizzle.String[0] != '.') {
fprintf(stderr, "First char of swizzle is not valid.\n");
return 0;
}
for (i = 0; i < 4; i++, str_index++) {
if (tokens.Swizzle.String[str_index] == '-') {
src_reg->Negate |= (1 << i);
str_index++;
}
switch(tokens.Swizzle.String[str_index]) {
case 'x':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X);
break;
case 'y':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y);
break;
case 'z':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z);
break;
case 'w':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W);
break;
case '1':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE);
break;
case '0':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO);
break;
case 'H':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF);
break;
case '_':
SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED);
break;
default:
fprintf(stderr, "Unknown src register swizzle.\n");
return 0;
}
}
}
DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n",
src_reg->File, src_reg->Index, src_reg->Swizzle,
src_reg->Negate, src_reg->Abs);
return 1;
}
#define REGEX_DST_MATCHES 4
struct dst_tokens {
struct match_info File;
struct match_info Index;
struct match_info WriteMask;
};
/**
* Initialize the destination for the instruction based on dst_str.
*
* NOTE: Warning in init_rc_normal_instruction() applies to this function as
* well.
*
* @param dst_str A string that represents the destination register. The format
* for this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
int init_rc_normal_dst(
struct rc_instruction * inst,
const char * dst_str)
{
const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)";
regmatch_t matches[REGEX_DST_MATCHES];
struct dst_tokens tokens;
unsigned int i;
/* Execute the regex */
if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) {
fprintf(stderr, "Failed to execute regex for dst register.\n");
return 0;
}
/* Create Tokens */
tokens.File.String = dst_str + matches[1].rm_so;
tokens.File.Length = match_length(matches, 1);
tokens.Index.String = dst_str + matches[2].rm_so;
tokens.Index.Length = match_length(matches, 2);
tokens.WriteMask.String = dst_str + matches[3].rm_so;
tokens.WriteMask.Length = match_length(matches, 3);
/* File Type */
if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
} else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) {
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
} else {
fprintf(stderr, "Unknown dst register file type.\n");
return 0;
}
/* File Index */
errno = 0;
inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10);
if (errno > 0) {
fprintf(stderr, "Could not convert dst register index\n");
return 0;
}
/* WriteMask */
if (tokens.WriteMask.Length == 0) {
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
} else {
/* The first character should be '.' */
if (tokens.WriteMask.String[0] != '.') {
fprintf(stderr, "1st char of writemask is not valid.\n");
return 0;
}
for (i = 1; i < tokens.WriteMask.Length; i++) {
switch(tokens.WriteMask.String[i]) {
case 'x':
inst->U.I.DstReg.WriteMask |= RC_MASK_X;
break;
case 'y':
inst->U.I.DstReg.WriteMask |= RC_MASK_Y;
break;
case 'z':
inst->U.I.DstReg.WriteMask |= RC_MASK_Z;
break;
case 'w':
inst->U.I.DstReg.WriteMask |= RC_MASK_W;
break;
default:
fprintf(stderr, "Unknown swizzle in writemask.\n");
return 0;
}
}
}
DBG("Dst Reg File=%u Index=%d Writemask=%d\n",
inst->U.I.DstReg.File,
inst->U.I.DstReg.Index,
inst->U.I.DstReg.WriteMask);
return 1;
}
#define REGEX_INST_MATCHES 7
struct inst_tokens {
struct match_info Opcode;
struct match_info Sat;
struct match_info Dst;
struct match_info Srcs[3];
};
/**
* Initialize a normal instruction based on inst_str.
*
* WARNING: This function might not be able to handle every kind of format that
* rc_program_print() can output. If you are having problems with a
* particular string, you may need to add support for it to this functions.
*
* @param inst_str A string that represents the source register. The format for
* this string is the same that is output by rc_program_print.
* @return 1 On success, 0 on failure
*/
int init_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str)
{
const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)";
int i;
regmatch_t matches[REGEX_INST_MATCHES];
struct inst_tokens tokens;
/* Initialize inst */
memset(inst, 0, sizeof(struct rc_instruction));
inst->Type = RC_INSTRUCTION_NORMAL;
/* Execute the regex */
if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) {
return 0;
}
memset(&tokens, 0, sizeof(tokens));
/* Create Tokens */
tokens.Opcode.String = inst_str + matches[1].rm_so;
tokens.Opcode.Length = match_length(matches, 1);
if (matches[2].rm_so > -1) {
tokens.Sat.String = inst_str + matches[2].rm_so;
tokens.Sat.Length = match_length(matches, 2);
}
/* Fill out the rest of the instruction. */
for (i = 0; i < MAX_RC_OPCODE; i++) {
const struct rc_opcode_info * info = rc_get_opcode_info(i);
unsigned int first_src = 3;
unsigned int j;
if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) {
continue;
}
inst->U.I.Opcode = info->Opcode;
if (info->HasDstReg) {
char * dst_str;
tokens.Dst.String = inst_str + matches[3].rm_so;
tokens.Dst.Length = match_length(matches, 3);
first_src++;
dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1));
strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length);
dst_str[tokens.Dst.Length] = '\0';
init_rc_normal_dst(inst, dst_str);
free(dst_str);
}
for (j = 0; j < info->NumSrcRegs; j++) {
char * src_str;
tokens.Srcs[j].String =
inst_str + matches[first_src + j].rm_so;
tokens.Srcs[j].Length =
match_length(matches, first_src + j);
src_str = malloc(sizeof(char) *
(tokens.Srcs[j].Length + 1));
strncpy(src_str, tokens.Srcs[j].String,
tokens.Srcs[j].Length);
src_str[tokens.Srcs[j].Length] = '\0';
init_rc_normal_src(inst, j, src_str);
}
break;
}
return 1;
}

View file

@ -0,0 +1,13 @@
int init_rc_normal_src(
struct rc_instruction * inst,
unsigned int src_index,
const char * src_str);
int init_rc_normal_dst(
struct rc_instruction * inst,
const char * dst_str);
int init_rc_normal_instruction(
struct rc_instruction * inst,
const char * inst_str);

View file

@ -0,0 +1,35 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "unit_test.h"
void run_tests(struct test tests[])
{
int i;
for (i = 0; tests[i].name; i++) {
printf("Test %s\n", tests[i].name);
memset(&tests[i].result, 0, sizeof(tests[i].result));
tests[i].test_func(&tests[i].result);
printf("Test %s (%d/%d) pass\n", tests[i].name,
tests[i].result.pass, tests[i].result.test_count);
}
}
void test_begin(struct test_result * result)
{
result->test_count++;
}
void test_check(struct test_result * result, int cond)
{
printf("Subtest %u -> ", result->test_count);
if (cond) {
result->pass++;
printf("Pass");
} else {
result->fail++;
printf("Fail");
}
printf("\n");
}

View file

@ -0,0 +1,17 @@
struct test_result {
unsigned int test_count;
unsigned int pass;
unsigned int fail;
};
struct test {
const char * name;
void (*test_func)(struct test_result * result);
struct test_result result;
};
void run_tests(struct test tests[]);
void test_begin(struct test_result * result);
void test_check(struct test_result * result, int cond);

View file

@ -24,7 +24,6 @@
#define R300_EMIT_H #define R300_EMIT_H
#include "r300_context.h" #include "r300_context.h"
#include "radeon_code.h"
struct rX00_fragment_program_code; struct rX00_fragment_program_code;
struct r300_vertex_program_code; struct r300_vertex_program_code;

View file

@ -38,8 +38,7 @@
#include "r300_texture.h" #include "r300_texture.h"
#include "r300_tgsi_to_rc.h" #include "r300_tgsi_to_rc.h"
#include "radeon_code.h" #include "compiler/radeon_compiler.h"
#include "radeon_compiler.h"
/* Convert info about FS input semantics to r300_shader_semantics. */ /* Convert info about FS input semantics to r300_shader_semantics. */
void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,

View file

@ -27,7 +27,7 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_scan.h"
#include "radeon_code.h" #include "compiler/radeon_code.h"
#include "r300_shader_semantics.h" #include "r300_shader_semantics.h"
struct r300_fragment_shader_code { struct r300_fragment_shader_code {

View file

@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_OUTC_D2A (3 << 23) # define R300_ALU_OUTC_D2A (3 << 23)
# define R300_ALU_OUTC_MIN (4 << 23) # define R300_ALU_OUTC_MIN (4 << 23)
# define R300_ALU_OUTC_MAX (5 << 23) # define R300_ALU_OUTC_MAX (5 << 23)
# define R300_ALU_OUTC_CMPH (7 << 23) # define R300_ALU_OUTC_CND (7 << 23)
# define R300_ALU_OUTC_CMP (8 << 23) # define R300_ALU_OUTC_CMP (8 << 23)
# define R300_ALU_OUTC_FRC (9 << 23) # define R300_ALU_OUTC_FRC (9 << 23)
# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) # define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
@ -2944,6 +2944,23 @@ enum {
/*\}*/ /*\}*/
#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
(((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
| ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
| ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
| ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
| ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
| ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
| ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
| ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
| ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
| ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
/* BEGIN: Packet 3 commands */ /* BEGIN: Packet 3 commands */
/* A primitive emission dword. */ /* A primitive emission dword. */
@ -3249,6 +3266,8 @@ enum {
# define R500_INST_RGB_CLAMP (1 << 19) # define R500_INST_RGB_CLAMP (1 << 19)
# define R500_INST_ALPHA_CLAMP (1 << 20) # define R500_INST_ALPHA_CLAMP (1 << 20)
# define R500_INST_ALU_RESULT_SEL (1 << 21) # define R500_INST_ALU_RESULT_SEL (1 << 21)
# define R500_INST_ALU_RESULT_SEL_RED (0 << 21)
# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21)
# define R500_INST_ALPHA_PRED_INV (1 << 22) # define R500_INST_ALPHA_PRED_INV (1 << 22)
# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) # define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
# define R500_INST_ALU_RESULT_OP_LT (1 << 23) # define R500_INST_ALU_RESULT_OP_LT (1 << 23)

View file

@ -22,8 +22,7 @@
#include "r300_tgsi_to_rc.h" #include "r300_tgsi_to_rc.h"
#include "radeon_compiler.h" #include "compiler/radeon_compiler.h"
#include "radeon_program.h"
#include "tgsi/tgsi_info.h" #include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_parse.h"

View file

@ -32,7 +32,7 @@
#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_ureg.h"
#include "radeon_compiler.h" #include "compiler/radeon_compiler.h"
/* Convert info about VS output semantics into r300_shader_semantics. */ /* Convert info about VS output semantics into r300_shader_semantics. */
static void r300_shader_read_vs_outputs( static void r300_shader_read_vs_outputs(

View file

@ -26,7 +26,7 @@
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_scan.h"
#include "radeon_code.h" #include "compiler/radeon_code.h"
#include "r300_context.h" #include "r300_context.h"
#include "r300_shader_semantics.h" #include "r300_shader_semantics.h"