mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
r300/compiler: reformat using default mesa .clang-format rules
Most notably switch from tabs to 3 spaces. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Acked-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23771>
This commit is contained in:
parent
4a6abbc9c1
commit
f94087be2c
55 changed files with 11838 additions and 12480 deletions
|
|
@ -9,72 +9,71 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#define POOL_LARGE_ALLOC 4096
|
||||
#define POOL_ALIGN 8
|
||||
|
||||
#define POOL_ALIGN 8
|
||||
|
||||
struct memory_block {
|
||||
struct memory_block * next;
|
||||
struct memory_block *next;
|
||||
};
|
||||
|
||||
void memory_pool_init(struct memory_pool * pool)
|
||||
void
|
||||
memory_pool_init(struct memory_pool *pool)
|
||||
{
|
||||
memset(pool, 0, sizeof(struct memory_pool));
|
||||
memset(pool, 0, sizeof(struct memory_pool));
|
||||
}
|
||||
|
||||
|
||||
void memory_pool_destroy(struct memory_pool * pool)
|
||||
void
|
||||
memory_pool_destroy(struct memory_pool *pool)
|
||||
{
|
||||
while(pool->blocks) {
|
||||
struct memory_block * block = pool->blocks;
|
||||
pool->blocks = block->next;
|
||||
free(block);
|
||||
}
|
||||
while (pool->blocks) {
|
||||
struct memory_block *block = pool->blocks;
|
||||
pool->blocks = block->next;
|
||||
free(block);
|
||||
}
|
||||
}
|
||||
|
||||
static void refill_pool(struct memory_pool * pool)
|
||||
static void
|
||||
refill_pool(struct memory_pool *pool)
|
||||
{
|
||||
unsigned int blocksize = pool->total_allocated;
|
||||
struct memory_block * newblock;
|
||||
unsigned int blocksize = pool->total_allocated;
|
||||
struct memory_block *newblock;
|
||||
|
||||
if (!blocksize)
|
||||
blocksize = 2*POOL_LARGE_ALLOC;
|
||||
if (!blocksize)
|
||||
blocksize = 2 * POOL_LARGE_ALLOC;
|
||||
|
||||
newblock = malloc(blocksize);
|
||||
newblock->next = pool->blocks;
|
||||
pool->blocks = newblock;
|
||||
newblock = malloc(blocksize);
|
||||
newblock->next = pool->blocks;
|
||||
pool->blocks = newblock;
|
||||
|
||||
pool->head = (unsigned char*)(newblock + 1);
|
||||
pool->end = ((unsigned char*)newblock) + blocksize;
|
||||
pool->total_allocated += blocksize;
|
||||
pool->head = (unsigned char *)(newblock + 1);
|
||||
pool->end = ((unsigned char *)newblock) + blocksize;
|
||||
pool->total_allocated += blocksize;
|
||||
}
|
||||
|
||||
|
||||
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
|
||||
void *
|
||||
memory_pool_malloc(struct memory_pool *pool, unsigned int bytes)
|
||||
{
|
||||
if (bytes < POOL_LARGE_ALLOC) {
|
||||
void * ptr;
|
||||
if (bytes < POOL_LARGE_ALLOC) {
|
||||
void *ptr;
|
||||
|
||||
if (pool->head + bytes > pool->end)
|
||||
refill_pool(pool);
|
||||
if (pool->head + bytes > pool->end)
|
||||
refill_pool(pool);
|
||||
|
||||
assert(pool->head + bytes <= pool->end);
|
||||
assert(pool->head + bytes <= pool->end);
|
||||
|
||||
ptr = pool->head;
|
||||
ptr = pool->head;
|
||||
|
||||
pool->head += bytes;
|
||||
pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
|
||||
pool->head += bytes;
|
||||
pool->head =
|
||||
(unsigned char *)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
|
||||
|
||||
return ptr;
|
||||
} else {
|
||||
struct memory_block * block = malloc(bytes + sizeof(struct memory_block));
|
||||
return ptr;
|
||||
} else {
|
||||
struct memory_block *block = malloc(bytes + sizeof(struct memory_block));
|
||||
|
||||
block->next = pool->blocks;
|
||||
pool->blocks = block;
|
||||
block->next = pool->blocks;
|
||||
pool->blocks = block;
|
||||
|
||||
return (block + 1);
|
||||
}
|
||||
return (block + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,17 +18,15 @@ struct memory_block;
|
|||
* reference counting headaches.
|
||||
*/
|
||||
struct memory_pool {
|
||||
unsigned char * head;
|
||||
unsigned char * end;
|
||||
unsigned int total_allocated;
|
||||
struct memory_block * blocks;
|
||||
unsigned char *head;
|
||||
unsigned char *end;
|
||||
unsigned int total_allocated;
|
||||
struct memory_block *blocks;
|
||||
};
|
||||
|
||||
|
||||
void memory_pool_init(struct memory_pool * pool);
|
||||
void memory_pool_destroy(struct memory_pool * pool);
|
||||
void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
|
||||
|
||||
void memory_pool_init(struct memory_pool *pool);
|
||||
void memory_pool_destroy(struct memory_pool *pool);
|
||||
void *memory_pool_malloc(struct memory_pool *pool, unsigned int bytes);
|
||||
|
||||
/**
|
||||
* Generic helper for growing an array that has separate size/count
|
||||
|
|
@ -46,18 +44,19 @@ void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
|
|||
* \warning Array, Size, Reserved have to be lvalues and may be evaluated
|
||||
* several times.
|
||||
*/
|
||||
#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
|
||||
unsigned int _num = (num); \
|
||||
if ((size) + _num > (reserved)) { \
|
||||
unsigned int newreserve = (reserved) * 2; \
|
||||
type * newarray; \
|
||||
if (newreserve < _num) \
|
||||
newreserve = 4 * _num; /* arbitrary heuristic */ \
|
||||
newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
|
||||
memcpy(newarray, (array), (size) * sizeof(type)); \
|
||||
(array) = newarray; \
|
||||
(reserved) = newreserve; \
|
||||
} \
|
||||
} while(0)
|
||||
#define memory_pool_array_reserve(pool, type, array, size, reserved, num) \
|
||||
do { \
|
||||
unsigned int _num = (num); \
|
||||
if ((size) + _num > (reserved)) { \
|
||||
unsigned int newreserve = (reserved) * 2; \
|
||||
type *newarray; \
|
||||
if (newreserve < _num) \
|
||||
newreserve = 4 * _num; /* arbitrary heuristic */ \
|
||||
newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
|
||||
memcpy(newarray, (array), (size) * sizeof(type)); \
|
||||
(array) = newarray; \
|
||||
(reserved) = newreserve; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif /* MEMORY_POOL_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -12,7 +12,6 @@
|
|||
struct nir_shader;
|
||||
struct pipe_screen;
|
||||
|
||||
const void *nir_to_rc(struct nir_shader *s,
|
||||
struct pipe_screen *screen);
|
||||
const void *nir_to_rc(struct nir_shader *s, struct pipe_screen *screen);
|
||||
|
||||
#endif /* NIR_TO_RC_H */
|
||||
|
|
|
|||
|
|
@ -9,308 +9,257 @@
|
|||
|
||||
#include "r300_reg.h"
|
||||
|
||||
static void presub_string(char out[10], unsigned int inst)
|
||||
static void
|
||||
presub_string(char out[10], unsigned int inst)
|
||||
{
|
||||
switch(inst & 0x600000){
|
||||
case R300_ALU_SRCP_1_MINUS_2_SRC0:
|
||||
sprintf(out, "bias");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
|
||||
sprintf(out, "sub");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
|
||||
sprintf(out, "add");
|
||||
break;
|
||||
case R300_ALU_SRCP_1_MINUS_SRC0:
|
||||
sprintf(out, "inv ");
|
||||
break;
|
||||
}
|
||||
switch (inst & 0x600000) {
|
||||
case R300_ALU_SRCP_1_MINUS_2_SRC0:
|
||||
sprintf(out, "bias");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
|
||||
sprintf(out, "sub");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
|
||||
sprintf(out, "add");
|
||||
break;
|
||||
case R300_ALU_SRCP_1_MINUS_SRC0:
|
||||
sprintf(out, "inv ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
|
||||
static int
|
||||
get_msb(unsigned int bit, unsigned int r400_ext_addr)
|
||||
{
|
||||
return (r400_ext_addr & bit) ? 1 << 5 : 0;
|
||||
return (r400_ext_addr & bit) ? 1 << 5 : 0;
|
||||
}
|
||||
|
||||
/* just some random things... */
|
||||
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
|
||||
struct r300_fragment_program_code *code = &compiler->code->code.r300;
|
||||
int n, i, j;
|
||||
static int pc = 0;
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
|
||||
struct r300_fragment_program_code *code = &compiler->code->code.r300;
|
||||
int n, i, j;
|
||||
static int pc = 0;
|
||||
|
||||
fprintf(stderr, "pc=%d*************************************\n", pc++);
|
||||
fprintf(stderr, "pc=%d*************************************\n", pc++);
|
||||
|
||||
fprintf(stderr, "Hardware program\n");
|
||||
fprintf(stderr, "----------------\n");
|
||||
if (c->is_r400) {
|
||||
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
|
||||
}
|
||||
fprintf(stderr, "Hardware program\n");
|
||||
fprintf(stderr, "----------------\n");
|
||||
if (c->is_r400) {
|
||||
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
|
||||
}
|
||||
|
||||
for (n = 0; n <= (code->config & 3); n++) {
|
||||
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
|
||||
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
|
||||
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
|
||||
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
|
||||
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
|
||||
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
|
||||
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
|
||||
for (n = 0; n <= (code->config & 3); n++) {
|
||||
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
|
||||
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
|
||||
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
|
||||
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
|
||||
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
|
||||
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
|
||||
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
|
||||
|
||||
fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
|
||||
"alu_end: %u, tex_end: %d (code_addr: %08x)\n", n,
|
||||
alu_offset, tex_offset, alu_end, tex_end, code_addr);
|
||||
fprintf(stderr,
|
||||
"NODE %d: alu_offset: %u, tex_offset: %d, "
|
||||
"alu_end: %u, tex_end: %d (code_addr: %08x)\n",
|
||||
n, alu_offset, tex_offset, alu_end, tex_end, code_addr);
|
||||
|
||||
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
|
||||
fprintf(stderr, " TEX:\n");
|
||||
for (i = tex_offset;
|
||||
i <= tex_offset + tex_end;
|
||||
++i) {
|
||||
const char *instr;
|
||||
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
|
||||
fprintf(stderr, " TEX:\n");
|
||||
for (i = tex_offset; i <= tex_offset + tex_end; ++i) {
|
||||
const char *instr;
|
||||
|
||||
switch ((code->tex.
|
||||
inst[i] >> R300_TEX_INST_SHIFT) &
|
||||
15) {
|
||||
case R300_TEX_OP_LD:
|
||||
instr = "TEX";
|
||||
break;
|
||||
case R300_TEX_OP_KIL:
|
||||
instr = "KIL";
|
||||
break;
|
||||
case R300_TEX_OP_TXP:
|
||||
instr = "TXP";
|
||||
break;
|
||||
case R300_TEX_OP_TXB:
|
||||
instr = "TXB";
|
||||
break;
|
||||
default:
|
||||
instr = "UNKNOWN";
|
||||
}
|
||||
switch ((code->tex.inst[i] >> R300_TEX_INST_SHIFT) & 15) {
|
||||
case R300_TEX_OP_LD:
|
||||
instr = "TEX";
|
||||
break;
|
||||
case R300_TEX_OP_KIL:
|
||||
instr = "KIL";
|
||||
break;
|
||||
case R300_TEX_OP_TXP:
|
||||
instr = "TXP";
|
||||
break;
|
||||
case R300_TEX_OP_TXB:
|
||||
instr = "TXB";
|
||||
break;
|
||||
default:
|
||||
instr = "UNKNOWN";
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
" %s t%i, %c%i, texture[%i] (%08x)\n",
|
||||
instr,
|
||||
(code->tex.
|
||||
inst[i] >> R300_DST_ADDR_SHIFT) & 31,
|
||||
't',
|
||||
(code->tex.
|
||||
inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
|
||||
(code->tex.
|
||||
inst[i] & R300_TEX_ID_MASK) >>
|
||||
R300_TEX_ID_SHIFT,
|
||||
code->tex.inst[i]);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr,
|
||||
(code->tex.inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't',
|
||||
(code->tex.inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
|
||||
(code->tex.inst[i] & R300_TEX_ID_MASK) >> R300_TEX_ID_SHIFT, code->tex.inst[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = alu_offset;
|
||||
i <= alu_offset + alu_end; ++i) {
|
||||
char srcc[4][10], dstc[20];
|
||||
char srca[4][10], dsta[20];
|
||||
char argc[3][20];
|
||||
char arga[3][20];
|
||||
char flags[5], tmp[10];
|
||||
for (i = alu_offset; i <= alu_offset + alu_end; ++i) {
|
||||
char srcc[4][10], dstc[20];
|
||||
char srca[4][10], dsta[20];
|
||||
char argc[3][20];
|
||||
char arga[3][20];
|
||||
char flags[5], tmp[10];
|
||||
|
||||
for (j = 0; j < 3; ++j) {
|
||||
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
|
||||
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
|
||||
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
|
||||
code->alu.inst[i].r400_ext_addr);
|
||||
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
|
||||
code->alu.inst[i].r400_ext_addr);
|
||||
for (j = 0; j < 3; ++j) {
|
||||
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
|
||||
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
|
||||
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), code->alu.inst[i].r400_ext_addr);
|
||||
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), code->alu.inst[i].r400_ext_addr);
|
||||
|
||||
sprintf(srcc[j], "%c%i",
|
||||
(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
|
||||
sprintf(srca[j], "%c%i",
|
||||
(rega & 32) ? 'c' : 't', (rega & 31) | msba);
|
||||
}
|
||||
sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', (regc & 31) | msbc);
|
||||
sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', (rega & 31) | msba);
|
||||
}
|
||||
|
||||
dstc[0] = 0;
|
||||
sprintf(flags, "%s%s%s",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
|
||||
if (flags[0] != 0) {
|
||||
unsigned int msb = get_msb(
|
||||
R400_ADDRD_EXT_RGB_MSB_BIT,
|
||||
code->alu.inst[i].r400_ext_addr);
|
||||
dstc[0] = 0;
|
||||
sprintf(flags, "%s%s%s", (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
|
||||
(code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
|
||||
(code->alu.inst[i].rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
|
||||
if (flags[0] != 0) {
|
||||
unsigned int msb = get_msb(R400_ADDRD_EXT_RGB_MSB_BIT, code->alu.inst[i].r400_ext_addr);
|
||||
|
||||
sprintf(dstc, "t%i.%s ",
|
||||
((code->alu.inst[i].
|
||||
rgb_addr >> R300_ALU_DSTC_SHIFT)
|
||||
& 31) | msb,
|
||||
flags);
|
||||
}
|
||||
sprintf(flags, "%s%s%s",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
|
||||
if (flags[0] != 0) {
|
||||
sprintf(tmp, "o%i.%s",
|
||||
(code->alu.inst[i].
|
||||
rgb_addr >> 29) & 3,
|
||||
flags);
|
||||
strcat(dstc, tmp);
|
||||
}
|
||||
/* Presub */
|
||||
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
|
||||
presub_string(srca[3], code->alu.inst[i].alpha_inst);
|
||||
sprintf(dstc, "t%i.%s ",
|
||||
((code->alu.inst[i].rgb_addr >> R300_ALU_DSTC_SHIFT) & 31) | msb, flags);
|
||||
}
|
||||
sprintf(flags, "%s%s%s", (code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
|
||||
(code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
|
||||
(code->alu.inst[i].rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
|
||||
if (flags[0] != 0) {
|
||||
sprintf(tmp, "o%i.%s", (code->alu.inst[i].rgb_addr >> 29) & 3, flags);
|
||||
strcat(dstc, tmp);
|
||||
}
|
||||
/* Presub */
|
||||
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
|
||||
presub_string(srca[3], code->alu.inst[i].alpha_inst);
|
||||
|
||||
dsta[0] = 0;
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
|
||||
unsigned int msb = get_msb(
|
||||
R400_ADDRD_EXT_A_MSB_BIT,
|
||||
code->alu.inst[i].r400_ext_addr);
|
||||
sprintf(dsta, "t%i.w ",
|
||||
((code->alu.inst[i].
|
||||
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
|
||||
| msb);
|
||||
}
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
|
||||
sprintf(tmp, "o%i.w ",
|
||||
(code->alu.inst[i].
|
||||
alpha_addr >> 25) & 3);
|
||||
strcat(dsta, tmp);
|
||||
}
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
|
||||
strcat(dsta, "Z");
|
||||
}
|
||||
dsta[0] = 0;
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
|
||||
unsigned int msb = get_msb(R400_ADDRD_EXT_A_MSB_BIT, code->alu.inst[i].r400_ext_addr);
|
||||
sprintf(dsta, "t%i.w ",
|
||||
((code->alu.inst[i].alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) | msb);
|
||||
}
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
|
||||
sprintf(tmp, "o%i.w ", (code->alu.inst[i].alpha_addr >> 25) & 3);
|
||||
strcat(dsta, tmp);
|
||||
}
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
|
||||
strcat(dsta, "Z");
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
|
||||
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
|
||||
srcc[0], srcc[1], srcc[2], srcc[3], dstc,
|
||||
code->alu.inst[i].rgb_addr, srca[0], srca[1],
|
||||
srca[2], srca[3], dsta,
|
||||
code->alu.inst[i].alpha_addr);
|
||||
fprintf(stderr,
|
||||
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
|
||||
" w: %3s %3s %3s %5s-> %-20s (%08x)\n",
|
||||
i, srcc[0], srcc[1], srcc[2], srcc[3], dstc, code->alu.inst[i].rgb_addr, srca[0],
|
||||
srca[1], srca[2], srca[3], dsta, code->alu.inst[i].alpha_addr);
|
||||
|
||||
for (j = 0; j < 3; ++j) {
|
||||
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
|
||||
int rega = code->alu.inst[i].alpha_inst >> (j * 7);
|
||||
int d;
|
||||
char buf[20];
|
||||
for (j = 0; j < 3; ++j) {
|
||||
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
|
||||
int rega = code->alu.inst[i].alpha_inst >> (j * 7);
|
||||
int d;
|
||||
char buf[20];
|
||||
|
||||
d = regc & 31;
|
||||
if (d < 12) {
|
||||
switch (d % 4) {
|
||||
case R300_ALU_ARGC_SRC0C_XYZ:
|
||||
sprintf(buf, "%s.xyz",
|
||||
srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_XXX:
|
||||
sprintf(buf, "%s.xxx",
|
||||
srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_YYY:
|
||||
sprintf(buf, "%s.yyy",
|
||||
srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_ZZZ:
|
||||
sprintf(buf, "%s.zzz",
|
||||
srcc[d / 4]);
|
||||
break;
|
||||
}
|
||||
} else if (d < 15) {
|
||||
sprintf(buf, "%s.www", srca[d - 12]);
|
||||
} else if (d < 20 ) {
|
||||
switch(d) {
|
||||
case R300_ALU_ARGC_SRCP_XYZ:
|
||||
sprintf(buf, "srcp.xyz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_XXX:
|
||||
sprintf(buf, "srcp.xxx");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_YYY:
|
||||
sprintf(buf, "srcp.yyy");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_ZZZ:
|
||||
sprintf(buf, "srcp.zzz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_WWW:
|
||||
sprintf(buf, "srcp.www");
|
||||
break;
|
||||
}
|
||||
} else if (d == 20) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 21) {
|
||||
sprintf(buf, "1.0");
|
||||
} else if (d == 22) {
|
||||
sprintf(buf, "0.5");
|
||||
} else if (d >= 23 && d < 32) {
|
||||
d -= 23;
|
||||
switch (d / 3) {
|
||||
case 0:
|
||||
sprintf(buf, "%s.yzx",
|
||||
srcc[d % 3]);
|
||||
break;
|
||||
case 1:
|
||||
sprintf(buf, "%s.zxy",
|
||||
srcc[d % 3]);
|
||||
break;
|
||||
case 2:
|
||||
sprintf(buf, "%s.Wzy",
|
||||
srcc[d % 3]);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
sprintf(buf, "%i", d);
|
||||
}
|
||||
d = regc & 31;
|
||||
if (d < 12) {
|
||||
switch (d % 4) {
|
||||
case R300_ALU_ARGC_SRC0C_XYZ:
|
||||
sprintf(buf, "%s.xyz", srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_XXX:
|
||||
sprintf(buf, "%s.xxx", srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_YYY:
|
||||
sprintf(buf, "%s.yyy", srcc[d / 4]);
|
||||
break;
|
||||
case R300_ALU_ARGC_SRC0C_ZZZ:
|
||||
sprintf(buf, "%s.zzz", srcc[d / 4]);
|
||||
break;
|
||||
}
|
||||
} else if (d < 15) {
|
||||
sprintf(buf, "%s.www", srca[d - 12]);
|
||||
} else if (d < 20) {
|
||||
switch (d) {
|
||||
case R300_ALU_ARGC_SRCP_XYZ:
|
||||
sprintf(buf, "srcp.xyz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_XXX:
|
||||
sprintf(buf, "srcp.xxx");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_YYY:
|
||||
sprintf(buf, "srcp.yyy");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_ZZZ:
|
||||
sprintf(buf, "srcp.zzz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_WWW:
|
||||
sprintf(buf, "srcp.www");
|
||||
break;
|
||||
}
|
||||
} else if (d == 20) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 21) {
|
||||
sprintf(buf, "1.0");
|
||||
} else if (d == 22) {
|
||||
sprintf(buf, "0.5");
|
||||
} else if (d >= 23 && d < 32) {
|
||||
d -= 23;
|
||||
switch (d / 3) {
|
||||
case 0:
|
||||
sprintf(buf, "%s.yzx", srcc[d % 3]);
|
||||
break;
|
||||
case 1:
|
||||
sprintf(buf, "%s.zxy", srcc[d % 3]);
|
||||
break;
|
||||
case 2:
|
||||
sprintf(buf, "%s.Wzy", srcc[d % 3]);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
sprintf(buf, "%i", d);
|
||||
}
|
||||
|
||||
sprintf(argc[j], "%s%s%s%s",
|
||||
(regc & 32) ? "-" : "",
|
||||
(regc & 64) ? "|" : "",
|
||||
buf, (regc & 64) ? "|" : "");
|
||||
sprintf(argc[j], "%s%s%s%s", (regc & 32) ? "-" : "", (regc & 64) ? "|" : "", buf,
|
||||
(regc & 64) ? "|" : "");
|
||||
|
||||
d = rega & 31;
|
||||
if (d < 9) {
|
||||
sprintf(buf, "%s.%c", srcc[d / 3],
|
||||
'x' + (char)(d % 3));
|
||||
} else if (d < 12) {
|
||||
sprintf(buf, "%s.w", srca[d - 9]);
|
||||
} else if (d < 16) {
|
||||
switch(d) {
|
||||
case R300_ALU_ARGA_SRCP_X:
|
||||
sprintf(buf, "srcp.x");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Y:
|
||||
sprintf(buf, "srcp.y");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Z:
|
||||
sprintf(buf, "srcp.z");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_W:
|
||||
sprintf(buf, "srcp.w");
|
||||
break;
|
||||
}
|
||||
} else if (d == 16) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 17) {
|
||||
sprintf(buf, "1.0");
|
||||
} else if (d == 18) {
|
||||
sprintf(buf, "0.5");
|
||||
} else {
|
||||
sprintf(buf, "%i", d);
|
||||
}
|
||||
d = rega & 31;
|
||||
if (d < 9) {
|
||||
sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d % 3));
|
||||
} else if (d < 12) {
|
||||
sprintf(buf, "%s.w", srca[d - 9]);
|
||||
} else if (d < 16) {
|
||||
switch (d) {
|
||||
case R300_ALU_ARGA_SRCP_X:
|
||||
sprintf(buf, "srcp.x");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Y:
|
||||
sprintf(buf, "srcp.y");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Z:
|
||||
sprintf(buf, "srcp.z");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_W:
|
||||
sprintf(buf, "srcp.w");
|
||||
break;
|
||||
}
|
||||
} else if (d == 16) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 17) {
|
||||
sprintf(buf, "1.0");
|
||||
} else if (d == 18) {
|
||||
sprintf(buf, "0.5");
|
||||
} else {
|
||||
sprintf(buf, "%i", d);
|
||||
}
|
||||
|
||||
sprintf(arga[j], "%s%s%s%s",
|
||||
(rega & 32) ? "-" : "",
|
||||
(rega & 64) ? "|" : "",
|
||||
buf, (rega & 64) ? "|" : "");
|
||||
}
|
||||
sprintf(arga[j], "%s%s%s%s", (rega & 32) ? "-" : "", (rega & 64) ? "|" : "", buf,
|
||||
(rega & 64) ? "|" : "");
|
||||
}
|
||||
|
||||
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
|
||||
" w: %8s %8s %8s op: %08x\n",
|
||||
argc[0], argc[1], argc[2],
|
||||
code->alu.inst[i].rgb_inst,
|
||||
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
|
||||
"NOP" : "",
|
||||
arga[0], arga[1],arga[2],
|
||||
code->alu.inst[i].alpha_inst);
|
||||
}
|
||||
}
|
||||
fprintf(stderr,
|
||||
" xyz: %8s %8s %8s op: %08x %s\n"
|
||||
" w: %8s %8s %8s op: %08x\n",
|
||||
argc[0], argc[1], argc[2], code->alu.inst[i].rgb_inst,
|
||||
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ? "NOP" : "", arga[0], arga[1],
|
||||
arga[2], code->alu.inst[i].alpha_inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
#include "radeon_compiler.h"
|
||||
#include "radeon_program.h"
|
||||
|
||||
|
||||
extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
|
||||
|
||||
extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
|
||||
|
|
|
|||
|
|
@ -18,41 +18,42 @@
|
|||
|
||||
#include "r300_reg.h"
|
||||
|
||||
#include "radeon_program_pair.h"
|
||||
#include "r300_fragprog_swizzle.h"
|
||||
#include "radeon_program_pair.h"
|
||||
|
||||
#include "util/compiler.h"
|
||||
|
||||
|
||||
struct r300_emit_state {
|
||||
struct r300_fragment_program_compiler * compiler;
|
||||
struct r300_fragment_program_compiler *compiler;
|
||||
|
||||
unsigned current_node : 2;
|
||||
unsigned node_first_tex : 8;
|
||||
unsigned node_first_alu : 8;
|
||||
uint32_t node_flags;
|
||||
unsigned current_node : 2;
|
||||
unsigned node_first_tex : 8;
|
||||
unsigned node_first_alu : 8;
|
||||
uint32_t node_flags;
|
||||
};
|
||||
|
||||
#define PROG_CODE \
|
||||
struct r300_fragment_program_compiler *c = emit->compiler; \
|
||||
struct r300_fragment_program_code *code = &c->code->code.r300
|
||||
#define PROG_CODE \
|
||||
struct r300_fragment_program_compiler *c = emit->compiler; \
|
||||
struct r300_fragment_program_code *code = &c->code->code.r300
|
||||
|
||||
#define error(fmt, args...) do { \
|
||||
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
|
||||
__FILE__, __func__, ##args); \
|
||||
} while(0)
|
||||
#define error(fmt, args...) \
|
||||
do { \
|
||||
rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \
|
||||
} while (0)
|
||||
|
||||
static unsigned int get_msbs_alu(unsigned int bits)
|
||||
static unsigned int
|
||||
get_msbs_alu(unsigned int bits)
|
||||
{
|
||||
return (bits >> 6) & 0x7;
|
||||
return (bits >> 6) & 0x7;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lsbs The number of least significant bits
|
||||
*/
|
||||
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
|
||||
static unsigned int
|
||||
get_msbs_tex(unsigned int bits, unsigned int lsbs)
|
||||
{
|
||||
return (bits >> lsbs) & 0x15;
|
||||
return (bits >> lsbs) & 0x15;
|
||||
}
|
||||
|
||||
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
|
||||
|
|
@ -60,477 +61,437 @@ static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
|
|||
/**
|
||||
* Mark a temporary register as used.
|
||||
*/
|
||||
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
|
||||
static void
|
||||
use_temporary(struct r300_fragment_program_code *code, unsigned int index)
|
||||
{
|
||||
if (index > code->pixsize)
|
||||
code->pixsize = index;
|
||||
if (index > code->pixsize)
|
||||
code->pixsize = index;
|
||||
}
|
||||
|
||||
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
|
||||
static unsigned int
|
||||
use_source(struct r300_fragment_program_code *code, struct rc_pair_instruction_source src)
|
||||
{
|
||||
if (!src.Used)
|
||||
return 0;
|
||||
if (!src.Used)
|
||||
return 0;
|
||||
|
||||
if (src.File == RC_FILE_CONSTANT) {
|
||||
return src.Index | (1 << 5);
|
||||
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
|
||||
use_temporary(code, src.Index);
|
||||
return src.Index & 0x1f;
|
||||
}
|
||||
if (src.File == RC_FILE_CONSTANT) {
|
||||
return src.Index | (1 << 5);
|
||||
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
|
||||
use_temporary(code, src.Index);
|
||||
return src.Index & 0x1f;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
|
||||
static unsigned int
|
||||
translate_rgb_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
|
||||
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
|
||||
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
|
||||
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
|
||||
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
|
||||
default:
|
||||
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_NOP:
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
|
||||
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
|
||||
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
|
||||
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
|
||||
}
|
||||
switch (opcode) {
|
||||
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
|
||||
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
|
||||
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
|
||||
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
|
||||
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
|
||||
default:
|
||||
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_NOP: FALLTHROUGH;
|
||||
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
|
||||
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
|
||||
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
|
||||
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
|
||||
static unsigned int
|
||||
translate_alpha_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
|
||||
{
|
||||
switch(opcode) {
|
||||
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
|
||||
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
|
||||
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
|
||||
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
|
||||
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
|
||||
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
|
||||
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
|
||||
default:
|
||||
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_NOP:
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
|
||||
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
|
||||
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
|
||||
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
|
||||
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
|
||||
}
|
||||
switch (opcode) {
|
||||
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
|
||||
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
|
||||
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
|
||||
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
|
||||
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
|
||||
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
|
||||
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
|
||||
default:
|
||||
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_NOP: FALLTHROUGH;
|
||||
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
|
||||
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
|
||||
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
|
||||
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
|
||||
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit one paired ALU instruction.
|
||||
*/
|
||||
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
|
||||
static int
|
||||
emit_alu(struct r300_emit_state *emit, struct rc_pair_instruction *inst)
|
||||
{
|
||||
int ip;
|
||||
int j;
|
||||
PROG_CODE;
|
||||
int ip;
|
||||
int j;
|
||||
PROG_CODE;
|
||||
|
||||
if (code->alu.length >= c->Base.max_alu_insts) {
|
||||
/* rc_recompute_ips does not give an exact count, because it counts extra stuff
|
||||
* like BEGINTEX, but here it is intended to be only approximative anyway,
|
||||
* just to give some idea how close to the limit we are. */
|
||||
rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
|
||||
rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
|
||||
return 0;
|
||||
}
|
||||
if (code->alu.length >= c->Base.max_alu_insts) {
|
||||
/* rc_recompute_ips does not give an exact count, because it counts extra stuff
|
||||
* like BEGINTEX, but here it is intended to be only approximative anyway,
|
||||
* just to give some idea how close to the limit we are. */
|
||||
rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
|
||||
rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ip = code->alu.length++;
|
||||
ip = code->alu.length++;
|
||||
|
||||
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
|
||||
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
|
||||
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
|
||||
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
|
||||
|
||||
for(j = 0; j < 3; ++j) {
|
||||
/* Set the RGB address */
|
||||
unsigned int src = use_source(code, inst->RGB.Src[j]);
|
||||
unsigned int arg;
|
||||
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
|
||||
for (j = 0; j < 3; ++j) {
|
||||
/* Set the RGB address */
|
||||
unsigned int src = use_source(code, inst->RGB.Src[j]);
|
||||
unsigned int arg;
|
||||
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
|
||||
|
||||
code->alu.inst[ip].rgb_addr |= src << (6*j);
|
||||
code->alu.inst[ip].rgb_addr |= src << (6 * j);
|
||||
|
||||
/* Set the Alpha address */
|
||||
src = use_source(code, inst->Alpha.Src[j]);
|
||||
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
|
||||
/* Set the Alpha address */
|
||||
src = use_source(code, inst->Alpha.Src[j]);
|
||||
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
|
||||
|
||||
code->alu.inst[ip].alpha_addr |= src << (6*j);
|
||||
code->alu.inst[ip].alpha_addr |= src << (6 * j);
|
||||
|
||||
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
|
||||
arg |= inst->RGB.Arg[j].Abs << 6;
|
||||
arg |= inst->RGB.Arg[j].Negate << 5;
|
||||
code->alu.inst[ip].rgb_inst |= arg << (7*j);
|
||||
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
|
||||
arg |= inst->RGB.Arg[j].Abs << 6;
|
||||
arg |= inst->RGB.Arg[j].Negate << 5;
|
||||
code->alu.inst[ip].rgb_inst |= arg << (7 * j);
|
||||
|
||||
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
|
||||
arg |= inst->Alpha.Arg[j].Abs << 6;
|
||||
arg |= inst->Alpha.Arg[j].Negate << 5;
|
||||
code->alu.inst[ip].alpha_inst |= arg << (7*j);
|
||||
}
|
||||
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
|
||||
arg |= inst->Alpha.Arg[j].Abs << 6;
|
||||
arg |= inst->Alpha.Arg[j].Negate << 5;
|
||||
code->alu.inst[ip].alpha_inst |= arg << (7 * j);
|
||||
}
|
||||
|
||||
/* Presubtract */
|
||||
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Presubtract */
|
||||
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->RGB.Saturate)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
|
||||
if (inst->Alpha.Saturate)
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
|
||||
if (inst->RGB.Saturate)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
|
||||
if (inst->Alpha.Saturate)
|
||||
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
|
||||
|
||||
if (inst->RGB.WriteMask) {
|
||||
use_temporary(code, inst->RGB.DestIndex);
|
||||
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
|
||||
code->alu.inst[ip].rgb_addr |=
|
||||
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
|
||||
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
|
||||
}
|
||||
if (inst->RGB.OutputWriteMask) {
|
||||
code->alu.inst[ip].rgb_addr |=
|
||||
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
|
||||
R300_RGB_TARGET(inst->RGB.Target);
|
||||
emit->node_flags |= R300_RGBA_OUT;
|
||||
}
|
||||
if (inst->RGB.WriteMask) {
|
||||
use_temporary(code, inst->RGB.DestIndex);
|
||||
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
|
||||
code->alu.inst[ip].rgb_addr |= ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
|
||||
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
|
||||
}
|
||||
if (inst->RGB.OutputWriteMask) {
|
||||
code->alu.inst[ip].rgb_addr |=
|
||||
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
|
||||
R300_RGB_TARGET(inst->RGB.Target);
|
||||
emit->node_flags |= R300_RGBA_OUT;
|
||||
}
|
||||
|
||||
if (inst->Alpha.WriteMask) {
|
||||
use_temporary(code, inst->Alpha.DestIndex);
|
||||
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
|
||||
code->alu.inst[ip].alpha_addr |=
|
||||
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
|
||||
R300_ALU_DSTA_REG;
|
||||
}
|
||||
if (inst->Alpha.OutputWriteMask) {
|
||||
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
|
||||
R300_ALPHA_TARGET(inst->Alpha.Target);
|
||||
emit->node_flags |= R300_RGBA_OUT;
|
||||
}
|
||||
if (inst->Alpha.DepthWriteMask) {
|
||||
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
|
||||
emit->node_flags |= R300_W_OUT;
|
||||
c->code->writes_depth = 1;
|
||||
}
|
||||
if (inst->Nop)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
|
||||
if (inst->Alpha.WriteMask) {
|
||||
use_temporary(code, inst->Alpha.DestIndex);
|
||||
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
|
||||
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
|
||||
code->alu.inst[ip].alpha_addr |=
|
||||
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG;
|
||||
}
|
||||
if (inst->Alpha.OutputWriteMask) {
|
||||
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | R300_ALPHA_TARGET(inst->Alpha.Target);
|
||||
emit->node_flags |= R300_RGBA_OUT;
|
||||
}
|
||||
if (inst->Alpha.DepthWriteMask) {
|
||||
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
|
||||
emit->node_flags |= R300_W_OUT;
|
||||
c->code->writes_depth = 1;
|
||||
}
|
||||
if (inst->Nop)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
|
||||
|
||||
/* Handle Output Modifier
|
||||
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
|
||||
if (inst->RGB.Omod) {
|
||||
if (inst->RGB.Omod == RC_OMOD_DISABLE) {
|
||||
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
|
||||
}
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
|
||||
}
|
||||
if (inst->Alpha.Omod) {
|
||||
if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
|
||||
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
|
||||
}
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
|
||||
}
|
||||
return 1;
|
||||
/* Handle Output Modifier
|
||||
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
|
||||
if (inst->RGB.Omod) {
|
||||
if (inst->RGB.Omod == RC_OMOD_DISABLE) {
|
||||
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
|
||||
}
|
||||
code->alu.inst[ip].rgb_inst |= (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
|
||||
}
|
||||
if (inst->Alpha.Omod) {
|
||||
if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
|
||||
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
|
||||
}
|
||||
code->alu.inst[ip].alpha_inst |= (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finish the current node without advancing to the next one.
|
||||
*/
|
||||
static int finish_node(struct r300_emit_state * emit)
|
||||
static int
|
||||
finish_node(struct r300_emit_state *emit)
|
||||
{
|
||||
struct r300_fragment_program_compiler * c = emit->compiler;
|
||||
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
|
||||
unsigned alu_offset;
|
||||
unsigned alu_end;
|
||||
unsigned tex_offset;
|
||||
unsigned tex_end;
|
||||
struct r300_fragment_program_compiler *c = emit->compiler;
|
||||
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
|
||||
unsigned alu_offset;
|
||||
unsigned alu_end;
|
||||
unsigned tex_offset;
|
||||
unsigned tex_end;
|
||||
|
||||
unsigned int alu_offset_msbs, alu_end_msbs;
|
||||
unsigned int alu_offset_msbs, alu_end_msbs;
|
||||
|
||||
if (code->alu.length == emit->node_first_alu) {
|
||||
/* Generate a single NOP for this node */
|
||||
struct rc_pair_instruction inst;
|
||||
memset(&inst, 0, sizeof(inst));
|
||||
if (!emit_alu(emit, &inst))
|
||||
return 0;
|
||||
}
|
||||
if (code->alu.length == emit->node_first_alu) {
|
||||
/* Generate a single NOP for this node */
|
||||
struct rc_pair_instruction inst;
|
||||
memset(&inst, 0, sizeof(inst));
|
||||
if (!emit_alu(emit, &inst))
|
||||
return 0;
|
||||
}
|
||||
|
||||
alu_offset = emit->node_first_alu;
|
||||
alu_end = code->alu.length - alu_offset - 1;
|
||||
tex_offset = emit->node_first_tex;
|
||||
tex_end = code->tex.length - tex_offset - 1;
|
||||
alu_offset = emit->node_first_alu;
|
||||
alu_end = code->alu.length - alu_offset - 1;
|
||||
tex_offset = emit->node_first_tex;
|
||||
tex_end = code->tex.length - tex_offset - 1;
|
||||
|
||||
if (code->tex.length == emit->node_first_tex) {
|
||||
if (emit->current_node > 0) {
|
||||
error("Node %i has no TEX instructions", emit->current_node);
|
||||
return 0;
|
||||
}
|
||||
if (code->tex.length == emit->node_first_tex) {
|
||||
if (emit->current_node > 0) {
|
||||
error("Node %i has no TEX instructions", emit->current_node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
tex_end = 0;
|
||||
} else {
|
||||
if (emit->current_node == 0)
|
||||
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
|
||||
}
|
||||
tex_end = 0;
|
||||
} else {
|
||||
if (emit->current_node == 0)
|
||||
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
|
||||
}
|
||||
|
||||
/* Write the config register.
|
||||
* Note: The order in which the words for each node are written
|
||||
* is not correct here and needs to be fixed up once we're entirely
|
||||
* done
|
||||
*
|
||||
* Also note that the register specification from AMD is slightly
|
||||
* incorrect in its description of this register. */
|
||||
code->code_addr[emit->current_node] =
|
||||
((alu_offset << R300_ALU_START_SHIFT)
|
||||
& R300_ALU_START_MASK)
|
||||
| ((alu_end << R300_ALU_SIZE_SHIFT)
|
||||
& R300_ALU_SIZE_MASK)
|
||||
| ((tex_offset << R300_TEX_START_SHIFT)
|
||||
& R300_TEX_START_MASK)
|
||||
| ((tex_end << R300_TEX_SIZE_SHIFT)
|
||||
& R300_TEX_SIZE_MASK)
|
||||
| emit->node_flags
|
||||
| (get_msbs_tex(tex_offset, 5)
|
||||
<< R400_TEX_START_MSB_SHIFT)
|
||||
| (get_msbs_tex(tex_end, 5)
|
||||
<< R400_TEX_SIZE_MSB_SHIFT)
|
||||
;
|
||||
/* Write the config register.
|
||||
* Note: The order in which the words for each node are written
|
||||
* is not correct here and needs to be fixed up once we're entirely
|
||||
* done
|
||||
*
|
||||
* Also note that the register specification from AMD is slightly
|
||||
* incorrect in its description of this register. */
|
||||
code->code_addr[emit->current_node] =
|
||||
((alu_offset << R300_ALU_START_SHIFT) & R300_ALU_START_MASK) |
|
||||
((alu_end << R300_ALU_SIZE_SHIFT) & R300_ALU_SIZE_MASK) |
|
||||
((tex_offset << R300_TEX_START_SHIFT) & R300_TEX_START_MASK) |
|
||||
((tex_end << R300_TEX_SIZE_SHIFT) & R300_TEX_SIZE_MASK) | emit->node_flags |
|
||||
(get_msbs_tex(tex_offset, 5) << R400_TEX_START_MSB_SHIFT) |
|
||||
(get_msbs_tex(tex_end, 5) << R400_TEX_SIZE_MSB_SHIFT);
|
||||
|
||||
/* Write r400 extended instruction fields. These will be ignored on
|
||||
* r300 cards. */
|
||||
alu_offset_msbs = get_msbs_alu(alu_offset);
|
||||
alu_end_msbs = get_msbs_alu(alu_end);
|
||||
switch(emit->current_node) {
|
||||
case 0:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
|
||||
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
|
||||
break;
|
||||
case 1:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
|
||||
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
|
||||
break;
|
||||
case 2:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
|
||||
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
|
||||
break;
|
||||
case 3:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
|
||||
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
/* Write r400 extended instruction fields. These will be ignored on
|
||||
* r300 cards. */
|
||||
alu_offset_msbs = get_msbs_alu(alu_offset);
|
||||
alu_end_msbs = get_msbs_alu(alu_end);
|
||||
switch (emit->current_node) {
|
||||
case 0:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
|
||||
break;
|
||||
case 1:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
|
||||
break;
|
||||
case 2:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
|
||||
break;
|
||||
case 3:
|
||||
code->r400_code_offset_ext |=
|
||||
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Begin a block of texture instructions.
|
||||
* Create the necessary indirection.
|
||||
*/
|
||||
static int begin_tex(struct r300_emit_state * emit)
|
||||
static int
|
||||
begin_tex(struct r300_emit_state *emit)
|
||||
{
|
||||
PROG_CODE;
|
||||
PROG_CODE;
|
||||
|
||||
if (code->alu.length == emit->node_first_alu &&
|
||||
code->tex.length == emit->node_first_tex) {
|
||||
return 1;
|
||||
}
|
||||
if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (emit->current_node == 3) {
|
||||
error("Too many texture indirections");
|
||||
return 0;
|
||||
}
|
||||
if (emit->current_node == 3) {
|
||||
error("Too many texture indirections");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!finish_node(emit))
|
||||
return 0;
|
||||
if (!finish_node(emit))
|
||||
return 0;
|
||||
|
||||
emit->current_node++;
|
||||
emit->node_first_tex = code->tex.length;
|
||||
emit->node_first_alu = code->alu.length;
|
||||
emit->node_flags = 0;
|
||||
return 1;
|
||||
emit->current_node++;
|
||||
emit->node_first_tex = code->tex.length;
|
||||
emit->node_first_alu = code->alu.length;
|
||||
emit->node_flags = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
|
||||
static int
|
||||
emit_tex(struct r300_emit_state *emit, struct rc_instruction *inst)
|
||||
{
|
||||
unsigned int unit;
|
||||
unsigned int dest;
|
||||
unsigned int opcode;
|
||||
PROG_CODE;
|
||||
unsigned int unit;
|
||||
unsigned int dest;
|
||||
unsigned int opcode;
|
||||
PROG_CODE;
|
||||
|
||||
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
|
||||
error("Too many TEX instructions");
|
||||
return 0;
|
||||
}
|
||||
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
|
||||
error("Too many TEX instructions");
|
||||
return 0;
|
||||
}
|
||||
|
||||
unit = inst->U.I.TexSrcUnit;
|
||||
dest = inst->U.I.DstReg.Index;
|
||||
unit = inst->U.I.TexSrcUnit;
|
||||
dest = inst->U.I.DstReg.Index;
|
||||
|
||||
switch(inst->U.I.Opcode) {
|
||||
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
|
||||
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
|
||||
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
|
||||
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
|
||||
default:
|
||||
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
|
||||
return 0;
|
||||
}
|
||||
switch (inst->U.I.Opcode) {
|
||||
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
|
||||
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
|
||||
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
|
||||
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
|
||||
default:
|
||||
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
|
||||
unit = 0;
|
||||
dest = 0;
|
||||
} else {
|
||||
use_temporary(code, dest);
|
||||
}
|
||||
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
|
||||
unit = 0;
|
||||
dest = 0;
|
||||
} else {
|
||||
use_temporary(code, dest);
|
||||
}
|
||||
|
||||
use_temporary(code, inst->U.I.SrcReg[0].Index);
|
||||
use_temporary(code, inst->U.I.SrcReg[0].Index);
|
||||
|
||||
code->tex.inst[code->tex.length++] =
|
||||
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
|
||||
& R300_SRC_ADDR_MASK)
|
||||
| ((dest << R300_DST_ADDR_SHIFT)
|
||||
& R300_DST_ADDR_MASK)
|
||||
| (unit << R300_TEX_ID_SHIFT)
|
||||
| (opcode << R300_TEX_INST_SHIFT)
|
||||
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
|
||||
R400_SRC_ADDR_EXT_BIT : 0)
|
||||
| (dest >= R300_PFS_NUM_TEMP_REGS ?
|
||||
R400_DST_ADDR_EXT_BIT : 0)
|
||||
;
|
||||
return 1;
|
||||
code->tex.inst[code->tex.length++] =
|
||||
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) & R300_SRC_ADDR_MASK) |
|
||||
((dest << R300_DST_ADDR_SHIFT) & R300_DST_ADDR_MASK) | (unit << R300_TEX_ID_SHIFT) |
|
||||
(opcode << R300_TEX_INST_SHIFT) |
|
||||
(inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? R400_SRC_ADDR_EXT_BIT : 0) |
|
||||
(dest >= R300_PFS_NUM_TEMP_REGS ? R400_DST_ADDR_EXT_BIT : 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Final compilation step: Turn the intermediate radeon_program into
|
||||
* machine-readable instructions.
|
||||
*/
|
||||
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
|
||||
struct r300_emit_state emit;
|
||||
struct r300_fragment_program_code *code = &compiler->code->code.r300;
|
||||
unsigned int tex_end;
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
|
||||
struct r300_emit_state emit;
|
||||
struct r300_fragment_program_code *code = &compiler->code->code.r300;
|
||||
unsigned int tex_end;
|
||||
|
||||
memset(&emit, 0, sizeof(emit));
|
||||
emit.compiler = compiler;
|
||||
memset(&emit, 0, sizeof(emit));
|
||||
emit.compiler = compiler;
|
||||
|
||||
memset(code, 0, sizeof(struct r300_fragment_program_code));
|
||||
memset(code, 0, sizeof(struct r300_fragment_program_code));
|
||||
|
||||
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
|
||||
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
|
||||
inst = inst->Next) {
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
|
||||
begin_tex(&emit);
|
||||
continue;
|
||||
}
|
||||
for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next;
|
||||
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) {
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
|
||||
begin_tex(&emit);
|
||||
continue;
|
||||
}
|
||||
|
||||
emit_tex(&emit, inst);
|
||||
} else {
|
||||
emit_alu(&emit, &inst->U.P);
|
||||
}
|
||||
}
|
||||
emit_tex(&emit, inst);
|
||||
} else {
|
||||
emit_alu(&emit, &inst->U.P);
|
||||
}
|
||||
}
|
||||
|
||||
if (code->pixsize >= compiler->Base.max_temp_regs)
|
||||
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
|
||||
if (code->pixsize >= compiler->Base.max_temp_regs)
|
||||
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
|
||||
|
||||
if (compiler->Base.Error)
|
||||
return;
|
||||
if (compiler->Base.Error)
|
||||
return;
|
||||
|
||||
/* Finish the program */
|
||||
finish_node(&emit);
|
||||
/* Finish the program */
|
||||
finish_node(&emit);
|
||||
|
||||
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
|
||||
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
|
||||
|
||||
/* Set r400 extended instruction fields. These values will be ignored
|
||||
* on r300 cards. */
|
||||
code->r400_code_offset_ext |=
|
||||
(get_msbs_alu(0)
|
||||
<< R400_ALU_OFFSET_MSB_SHIFT)
|
||||
| (get_msbs_alu(code->alu.length - 1)
|
||||
<< R400_ALU_SIZE_MSB_SHIFT);
|
||||
/* Set r400 extended instruction fields. These values will be ignored
|
||||
* on r300 cards. */
|
||||
code->r400_code_offset_ext |= (get_msbs_alu(0) << R400_ALU_OFFSET_MSB_SHIFT) |
|
||||
(get_msbs_alu(code->alu.length - 1) << R400_ALU_SIZE_MSB_SHIFT);
|
||||
|
||||
tex_end = code->tex.length ? code->tex.length - 1 : 0;
|
||||
code->code_offset =
|
||||
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
|
||||
& R300_PFS_CNTL_ALU_OFFSET_MASK)
|
||||
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
|
||||
& R300_PFS_CNTL_ALU_END_MASK)
|
||||
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
|
||||
& R300_PFS_CNTL_TEX_OFFSET_MASK)
|
||||
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
|
||||
& R300_PFS_CNTL_TEX_END_MASK)
|
||||
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
|
||||
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
|
||||
;
|
||||
tex_end = code->tex.length ? code->tex.length - 1 : 0;
|
||||
code->code_offset =
|
||||
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) & R300_PFS_CNTL_ALU_OFFSET_MASK) |
|
||||
(((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) & R300_PFS_CNTL_ALU_END_MASK) |
|
||||
((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) & R300_PFS_CNTL_TEX_OFFSET_MASK) |
|
||||
((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) & R300_PFS_CNTL_TEX_END_MASK) |
|
||||
(get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) |
|
||||
(get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT);
|
||||
|
||||
if (emit.current_node < 3) {
|
||||
int shift = 3 - emit.current_node;
|
||||
int i;
|
||||
for(i = emit.current_node; i >= 0; --i)
|
||||
code->code_addr[shift + i] = code->code_addr[i];
|
||||
for(i = 0; i < shift; ++i)
|
||||
code->code_addr[i] = 0;
|
||||
}
|
||||
if (emit.current_node < 3) {
|
||||
int shift = 3 - emit.current_node;
|
||||
int i;
|
||||
for (i = emit.current_node; i >= 0; --i)
|
||||
code->code_addr[shift + i] = code->code_addr[i];
|
||||
for (i = 0; i < shift; ++i)
|
||||
code->code_addr[i] = 0;
|
||||
}
|
||||
|
||||
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
|
||||
|| code->alu.length > R300_PFS_MAX_ALU_INST
|
||||
|| code->tex.length > R300_PFS_MAX_TEX_INST) {
|
||||
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS || code->alu.length > R300_PFS_MAX_ALU_INST ||
|
||||
code->tex.length > R300_PFS_MAX_TEX_INST) {
|
||||
|
||||
code->r390_mode = 1;
|
||||
}
|
||||
code->r390_mode = 1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,28 +17,28 @@
|
|||
#include "r300_reg.h"
|
||||
#include "radeon_compiler.h"
|
||||
|
||||
#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
|
||||
#define MAKE_SWZ3(x, y, z) \
|
||||
(RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
|
||||
|
||||
struct swizzle_data {
|
||||
unsigned int hash; /**< swizzle value this matches */
|
||||
unsigned int base; /**< base value for hw swizzle */
|
||||
unsigned int stride; /**< difference in base between arg0/1/2 */
|
||||
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
|
||||
unsigned int hash; /**< swizzle value this matches */
|
||||
unsigned int base; /**< base value for hw swizzle */
|
||||
unsigned int stride; /**< difference in base between arg0/1/2 */
|
||||
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
|
||||
};
|
||||
|
||||
static const struct swizzle_data native_swizzles[] = {
|
||||
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
|
||||
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
|
||||
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
|
||||
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
|
||||
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
|
||||
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
|
||||
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
|
||||
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
|
||||
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
|
||||
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
|
||||
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
|
||||
};
|
||||
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
|
||||
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
|
||||
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
|
||||
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
|
||||
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
|
||||
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
|
||||
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
|
||||
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
|
||||
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
|
||||
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
|
||||
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}};
|
||||
|
||||
static const int num_native_swizzles = ARRAY_SIZE(native_swizzles);
|
||||
/* Only swizzles with srcp_stride != 0 can be used for presub, so
|
||||
|
|
@ -49,24 +49,25 @@ static const int num_presub_swizzles = 5;
|
|||
* Find a native RGB swizzle that matches the given swizzle.
|
||||
* Returns 0 if none found.
|
||||
*/
|
||||
static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
|
||||
static const struct swizzle_data *
|
||||
lookup_native_swizzle(unsigned int swizzle)
|
||||
{
|
||||
int i, comp;
|
||||
int i, comp;
|
||||
|
||||
for(i = 0; i < num_native_swizzles; ++i) {
|
||||
const struct swizzle_data* sd = &native_swizzles[i];
|
||||
for(comp = 0; comp < 3; ++comp) {
|
||||
unsigned int swz = GET_SWZ(swizzle, comp);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz != GET_SWZ(sd->hash, comp))
|
||||
break;
|
||||
}
|
||||
if (comp == 3)
|
||||
return sd;
|
||||
}
|
||||
for (i = 0; i < num_native_swizzles; ++i) {
|
||||
const struct swizzle_data *sd = &native_swizzles[i];
|
||||
for (comp = 0; comp < 3; ++comp) {
|
||||
unsigned int swz = GET_SWZ(swizzle, comp);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz != GET_SWZ(sd->hash, comp))
|
||||
break;
|
||||
}
|
||||
if (comp == 3)
|
||||
return sd;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -74,154 +75,156 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
|
|||
* it is better to use r300_swizzle_is_native() which can be accessed via
|
||||
* struct radeon_compiler *c; c->SwizzleCaps->IsNative().
|
||||
*/
|
||||
int r300_swizzle_is_native_basic(unsigned int swizzle)
|
||||
int
|
||||
r300_swizzle_is_native_basic(unsigned int swizzle)
|
||||
{
|
||||
if(lookup_native_swizzle(swizzle))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
if (lookup_native_swizzle(swizzle))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the given instruction supports the swizzle and negate
|
||||
* combinations in the given source register.
|
||||
*/
|
||||
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
||||
static int
|
||||
r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
||||
{
|
||||
const struct swizzle_data* sd;
|
||||
unsigned int relevant;
|
||||
int j;
|
||||
const struct swizzle_data *sd;
|
||||
unsigned int relevant;
|
||||
int j;
|
||||
|
||||
if (opcode == RC_OPCODE_KIL ||
|
||||
opcode == RC_OPCODE_TEX ||
|
||||
opcode == RC_OPCODE_TXB ||
|
||||
opcode == RC_OPCODE_TXP) {
|
||||
if (reg.Abs || reg.Negate)
|
||||
return 0;
|
||||
if (opcode == RC_OPCODE_KIL || opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB ||
|
||||
opcode == RC_OPCODE_TXP) {
|
||||
if (reg.Abs || reg.Negate)
|
||||
return 0;
|
||||
|
||||
for(j = 0; j < 4; ++j) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, j);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz != j)
|
||||
return 0;
|
||||
}
|
||||
for (j = 0; j < 4; ++j) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, j);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz != j)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
relevant = 0;
|
||||
relevant = 0;
|
||||
|
||||
for(j = 0; j < 3; ++j)
|
||||
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
|
||||
relevant |= 1 << j;
|
||||
for (j = 0; j < 3; ++j)
|
||||
if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
|
||||
relevant |= 1 << j;
|
||||
|
||||
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
|
||||
return 0;
|
||||
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
|
||||
return 0;
|
||||
|
||||
sd = lookup_native_swizzle(reg.Swizzle);
|
||||
if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
|
||||
return 0;
|
||||
sd = lookup_native_swizzle(reg.Swizzle);
|
||||
if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static void r300_swizzle_split(
|
||||
struct rc_src_register src, unsigned int mask,
|
||||
struct rc_swizzle_split * split)
|
||||
static void
|
||||
r300_swizzle_split(struct rc_src_register src, unsigned int mask, struct rc_swizzle_split *split)
|
||||
{
|
||||
split->NumPhases = 0;
|
||||
split->NumPhases = 0;
|
||||
|
||||
while(mask) {
|
||||
unsigned int best_matchcount = 0;
|
||||
unsigned int best_matchmask = 0;
|
||||
int i, comp;
|
||||
while (mask) {
|
||||
unsigned int best_matchcount = 0;
|
||||
unsigned int best_matchmask = 0;
|
||||
int i, comp;
|
||||
|
||||
unsigned num_swizzles = src.File == RC_FILE_PRESUB ? num_presub_swizzles : num_native_swizzles;
|
||||
unsigned num_swizzles =
|
||||
src.File == RC_FILE_PRESUB ? num_presub_swizzles : num_native_swizzles;
|
||||
|
||||
for(i = 0; i < num_swizzles; ++i) {
|
||||
const struct swizzle_data *sd = &native_swizzles[i];
|
||||
unsigned int matchcount = 0;
|
||||
unsigned int matchmask = 0;
|
||||
for(comp = 0; comp < 3; ++comp) {
|
||||
unsigned int swz;
|
||||
if (!GET_BIT(mask, comp))
|
||||
continue;
|
||||
swz = GET_SWZ(src.Swizzle, comp);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz == GET_SWZ(sd->hash, comp)) {
|
||||
/* check if the negate bit of current component
|
||||
* is the same for already matched components */
|
||||
if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
|
||||
continue;
|
||||
for (i = 0; i < num_swizzles; ++i) {
|
||||
const struct swizzle_data *sd = &native_swizzles[i];
|
||||
unsigned int matchcount = 0;
|
||||
unsigned int matchmask = 0;
|
||||
for (comp = 0; comp < 3; ++comp) {
|
||||
unsigned int swz;
|
||||
if (!GET_BIT(mask, comp))
|
||||
continue;
|
||||
swz = GET_SWZ(src.Swizzle, comp);
|
||||
if (swz == RC_SWIZZLE_UNUSED)
|
||||
continue;
|
||||
if (swz == GET_SWZ(sd->hash, comp)) {
|
||||
/* check if the negate bit of current component
|
||||
* is the same for already matched components */
|
||||
if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
|
||||
continue;
|
||||
|
||||
matchcount++;
|
||||
matchmask |= 1 << comp;
|
||||
}
|
||||
}
|
||||
if (matchcount > best_matchcount) {
|
||||
best_matchcount = matchcount;
|
||||
best_matchmask = matchmask;
|
||||
if (matchmask == (mask & RC_MASK_XYZ))
|
||||
break;
|
||||
}
|
||||
}
|
||||
matchcount++;
|
||||
matchmask |= 1 << comp;
|
||||
}
|
||||
}
|
||||
if (matchcount > best_matchcount) {
|
||||
best_matchcount = matchcount;
|
||||
best_matchmask = matchmask;
|
||||
if (matchmask == (mask & RC_MASK_XYZ))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & RC_MASK_W)
|
||||
best_matchmask |= RC_MASK_W;
|
||||
if (mask & RC_MASK_W)
|
||||
best_matchmask |= RC_MASK_W;
|
||||
|
||||
split->Phase[split->NumPhases++] = best_matchmask;
|
||||
mask &= ~best_matchmask;
|
||||
}
|
||||
split->Phase[split->NumPhases++] = best_matchmask;
|
||||
mask &= ~best_matchmask;
|
||||
}
|
||||
}
|
||||
|
||||
const struct rc_swizzle_caps r300_swizzle_caps = {
|
||||
.IsNative = r300_swizzle_is_native,
|
||||
.Split = r300_swizzle_split
|
||||
};
|
||||
|
||||
const struct rc_swizzle_caps r300_swizzle_caps = {.IsNative = r300_swizzle_is_native,
|
||||
.Split = r300_swizzle_split};
|
||||
|
||||
/**
|
||||
* Translate an RGB (XYZ) swizzle into the hardware code for the given
|
||||
* instruction source.
|
||||
*/
|
||||
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
|
||||
unsigned int
|
||||
r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
|
||||
{
|
||||
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
|
||||
const struct swizzle_data *sd = lookup_native_swizzle(swizzle);
|
||||
|
||||
if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
|
||||
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
|
||||
return 0;
|
||||
}
|
||||
if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
|
||||
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return sd->base + sd->srcp_stride;
|
||||
} else {
|
||||
return sd->base + src*sd->stride;
|
||||
}
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return sd->base + sd->srcp_stride;
|
||||
} else {
|
||||
return sd->base + src * sd->stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate an Alpha (W) swizzle into the hardware code for the given
|
||||
* instruction source.
|
||||
*/
|
||||
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
|
||||
unsigned int
|
||||
r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
|
||||
{
|
||||
unsigned int swz = GET_SWZ(swizzle, 0);
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return R300_ALU_ARGA_SRCP_X + swz;
|
||||
}
|
||||
if (swz < 3)
|
||||
return swz + 3*src;
|
||||
unsigned int swz = GET_SWZ(swizzle, 0);
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return R300_ALU_ARGA_SRCP_X + swz;
|
||||
}
|
||||
if (swz < 3)
|
||||
return swz + 3 * src;
|
||||
|
||||
switch(swz) {
|
||||
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
|
||||
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
|
||||
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
|
||||
case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
|
||||
default: return R300_ALU_ARGA_ONE;
|
||||
}
|
||||
switch (swz) {
|
||||
case RC_SWIZZLE_W:
|
||||
return R300_ALU_ARGA_SRC0A + src;
|
||||
case RC_SWIZZLE_ONE:
|
||||
return R300_ALU_ARGA_ONE;
|
||||
case RC_SWIZZLE_ZERO:
|
||||
return R300_ALU_ARGA_ZERO;
|
||||
case RC_SWIZZLE_HALF:
|
||||
return R300_ALU_ARGA_HALF;
|
||||
default:
|
||||
return R300_ALU_ARGA_ONE;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
bool
|
||||
r300_is_only_used_as_float(const nir_alu_instr *instr)
|
||||
{
|
||||
nir_foreach_use(src, &instr->def) {
|
||||
nir_foreach_use (src, &instr->def) {
|
||||
if (nir_src_is_if(src))
|
||||
return false;
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ r300_is_only_used_as_float(const nir_alu_instr *instr)
|
|||
return false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
const nir_op_info *info = &nir_op_infos[alu->op];
|
||||
|
|
@ -46,7 +46,7 @@ r300_is_only_used_as_float(const nir_alu_instr *instr)
|
|||
static unsigned char
|
||||
r300_should_vectorize_instr(const nir_instr *instr, const void *data)
|
||||
{
|
||||
bool *too_many_ubos = (bool *) data;
|
||||
bool *too_many_ubos = (bool *)data;
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return 0;
|
||||
|
|
@ -88,7 +88,8 @@ r300_should_vectorize_instr(const nir_instr *instr, const void *data)
|
|||
* the constants later, we need to be extra careful with adding
|
||||
* new constants anyway.
|
||||
*/
|
||||
static bool have_too_many_ubos(nir_shader *s, bool is_r500)
|
||||
static bool
|
||||
have_too_many_ubos(nir_shader *s, bool is_r500)
|
||||
{
|
||||
if (s->info.stage != MESA_SHADER_FRAGMENT)
|
||||
return false;
|
||||
|
|
@ -96,9 +97,9 @@ static bool have_too_many_ubos(nir_shader *s, bool is_r500)
|
|||
if (is_r500)
|
||||
return false;
|
||||
|
||||
nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) {
|
||||
nir_foreach_variable_with_modes (var, s, nir_var_mem_ubo) {
|
||||
int ubo = var->data.driver_location;
|
||||
assert (ubo == 0);
|
||||
assert(ubo == 0);
|
||||
|
||||
unsigned size = glsl_get_explicit_size(var->interface_type, false);
|
||||
if (DIV_ROUND_UP(size, 16) > 32)
|
||||
|
|
@ -208,8 +209,7 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
|
|||
|
||||
NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
|
||||
if (is_r500)
|
||||
nir_shader_intrinsics_pass(s, set_speculate,
|
||||
nir_metadata_control_flow, NULL);
|
||||
nir_shader_intrinsics_pass(s, set_speculate, nir_metadata_control_flow, NULL);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
|
||||
|
|
@ -221,10 +221,9 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
|
|||
NIR_PASS(progress, s, nir_opt_loop);
|
||||
|
||||
bool too_many_ubos = have_too_many_ubos(s, is_r500);
|
||||
NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr,
|
||||
&too_many_ubos);
|
||||
NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr, &too_many_ubos);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
if(!progress)
|
||||
if (!progress)
|
||||
NIR_PASS(progress, s, nir_lower_undef_to_zero);
|
||||
NIR_PASS(progress, s, nir_opt_loop_unroll);
|
||||
|
||||
|
|
@ -246,11 +245,11 @@ r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
|
|||
} while (progress);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_var_copies);
|
||||
NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
|
||||
NULL);
|
||||
NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
}
|
||||
|
||||
static char *r300_check_control_flow(nir_shader *s)
|
||||
static char *
|
||||
r300_check_control_flow(nir_shader *s)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(s);
|
||||
nir_block *first = nir_start_block(impl);
|
||||
|
|
@ -258,12 +257,14 @@ static char *r300_check_control_flow(nir_shader *s)
|
|||
|
||||
if (next) {
|
||||
switch (next->type) {
|
||||
case nir_cf_node_if:
|
||||
return "If/then statements not supported by R300/R400 shaders, should have been flattened by peephole_select.";
|
||||
case nir_cf_node_loop:
|
||||
return "Looping not supported R300/R400 shaders, all loops must be statically unrollable.";
|
||||
default:
|
||||
return "Unknown control flow type";
|
||||
case nir_cf_node_if:
|
||||
return "If/then statements not supported by R300/R400 shaders, should have been "
|
||||
"flattened by peephole_select.";
|
||||
case nir_cf_node_loop:
|
||||
return "Looping not supported R300/R400 shaders, all loops must be statically "
|
||||
"unrollable.";
|
||||
default:
|
||||
return "Unknown control flow type";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -283,10 +284,9 @@ r300_finalize_nir(struct pipe_screen *pscreen, void *nir)
|
|||
* because they're needed for YUV variant lowering.
|
||||
*/
|
||||
nir_remove_dead_derefs(s);
|
||||
nir_foreach_uniform_variable_safe(var, s) {
|
||||
nir_foreach_uniform_variable_safe (var, s) {
|
||||
if (var->data.mode == nir_var_uniform &&
|
||||
(glsl_type_get_image_count(var->type) ||
|
||||
glsl_type_get_sampler_count(var->type)))
|
||||
(glsl_type_get_image_count(var->type) || glsl_type_get_sampler_count(var->type)))
|
||||
continue;
|
||||
|
||||
exec_node_remove(&var->node);
|
||||
|
|
|
|||
|
|
@ -8,13 +8,12 @@
|
|||
|
||||
#include <math.h>
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
static inline bool
|
||||
is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
is_ubo_or_input(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src,
|
||||
unsigned num_components, const uint8_t *swizzle)
|
||||
{
|
||||
nir_instr *parent = instr->src[src].src.ssa->parent_instr;
|
||||
if (parent->type != nir_instr_type_intrinsic)
|
||||
|
|
@ -36,7 +35,7 @@ static inline bool
|
|||
is_not_used_in_single_if(const nir_alu_instr *instr)
|
||||
{
|
||||
unsigned if_uses = 0;
|
||||
nir_foreach_use(src, &instr->def) {
|
||||
nir_foreach_use (src, &instr->def) {
|
||||
if (nir_src_is_if(src))
|
||||
if_uses++;
|
||||
else
|
||||
|
|
@ -87,19 +86,19 @@ check_instr_and_src_value(nir_op op, nir_instr **instr, double value)
|
|||
unsigned i;
|
||||
for (i = 0; i <= 2; i++) {
|
||||
if (i == 2) {
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
nir_alu_src src = alu->src[i];
|
||||
if (nir_src_is_const(src.src)) {
|
||||
/* All components must be reading the same value. */
|
||||
for (unsigned j = 0; j < alu->def.num_components - 1; j++) {
|
||||
if (src.swizzle[j] != src.swizzle[j + 1]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (fabs(nir_src_comp_as_float(src.src, src.swizzle[0]) - value) < 1e-5) {
|
||||
break;
|
||||
/* All components must be reading the same value. */
|
||||
for (unsigned j = 0; j < alu->def.num_components - 1; j++) {
|
||||
if (src.swizzle[j] != src.swizzle[j + 1]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (fabs(nir_src_comp_as_float(src.src, src.swizzle[0]) - value) < 1e-5) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*instr = alu->src[1 - i].src.ssa->parent_instr;
|
||||
|
|
|
|||
|
|
@ -7,47 +7,48 @@
|
|||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program_alu.h"
|
||||
#include "radeon_program_tex.h"
|
||||
#include "radeon_rename_regs.h"
|
||||
#include "radeon_remove_constants.h"
|
||||
#include "radeon_variable.h"
|
||||
#include "radeon_list.h"
|
||||
#include "r300_fragprog.h"
|
||||
#include "r300_fragprog_swizzle.h"
|
||||
#include "r500_fragprog.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_list.h"
|
||||
#include "radeon_program_alu.h"
|
||||
#include "radeon_program_tex.h"
|
||||
#include "radeon_remove_constants.h"
|
||||
#include "radeon_rename_regs.h"
|
||||
#include "radeon_variable.h"
|
||||
|
||||
|
||||
static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
|
||||
static void
|
||||
rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
|
||||
struct rc_instruction *rci;
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
|
||||
struct rc_instruction *rci;
|
||||
|
||||
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
|
||||
struct rc_sub_instruction * inst = &rci->U.I;
|
||||
unsigned i;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
|
||||
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions;
|
||||
rci = rci->Next) {
|
||||
struct rc_sub_instruction *inst = &rci->U.I;
|
||||
unsigned i;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
|
||||
|
||||
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
|
||||
continue;
|
||||
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
|
||||
continue;
|
||||
|
||||
if (inst->DstReg.WriteMask & RC_MASK_Z) {
|
||||
inst->DstReg.WriteMask = RC_MASK_W;
|
||||
} else {
|
||||
inst->DstReg.WriteMask = 0;
|
||||
continue;
|
||||
}
|
||||
if (inst->DstReg.WriteMask & RC_MASK_Z) {
|
||||
inst->DstReg.WriteMask = RC_MASK_W;
|
||||
} else {
|
||||
inst->DstReg.WriteMask = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!info->IsComponentwise) {
|
||||
continue;
|
||||
}
|
||||
if (!info->IsComponentwise) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -66,100 +67,92 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
|
|||
* rc_get_variables can't get properly readers for normal instructions if presubtract
|
||||
* is present (it works fine for pair instructions).
|
||||
*/
|
||||
static void rc_convert_rgb_alpha(struct radeon_compiler *c, void *user)
|
||||
static void
|
||||
rc_convert_rgb_alpha(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_list * variables;
|
||||
struct rc_list * var_ptr;
|
||||
struct rc_list *variables;
|
||||
struct rc_list *var_ptr;
|
||||
|
||||
variables = rc_get_variables(c);
|
||||
variables = rc_get_variables(c);
|
||||
|
||||
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) {
|
||||
struct rc_variable * var = var_ptr->Item;
|
||||
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) {
|
||||
struct rc_variable *var = var_ptr->Item;
|
||||
|
||||
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
|
||||
continue;
|
||||
}
|
||||
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Only rewrite scalar opcodes that are used separately for now. */
|
||||
if (var->Friend)
|
||||
continue;
|
||||
/* Only rewrite scalar opcodes that are used separately for now. */
|
||||
if (var->Friend)
|
||||
continue;
|
||||
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(var->Inst->U.I.Opcode);
|
||||
if (opcode->IsStandardScalar && var->Dst.WriteMask != RC_MASK_W) {
|
||||
unsigned index = rc_find_free_temporary(c);
|
||||
rc_variable_change_dst(var, index, RC_MASK_W);
|
||||
}
|
||||
}
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(var->Inst->U.I.Opcode);
|
||||
if (opcode->IsStandardScalar && var->Dst.WriteMask != RC_MASK_W) {
|
||||
unsigned index = rc_find_free_temporary(c);
|
||||
rc_variable_change_dst(var, index, RC_MASK_W);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
||||
void
|
||||
r3xx_compile_fragment_program(struct r300_fragment_program_compiler *c)
|
||||
{
|
||||
int is_r500 = c->Base.is_r500;
|
||||
int opt = !c->Base.disable_optimizations;
|
||||
int alpha2one = c->state.alpha_to_one;
|
||||
int is_r500 = c->Base.is_r500;
|
||||
int opt = !c->Base.disable_optimizations;
|
||||
int alpha2one = c->state.alpha_to_one;
|
||||
bool dbg = c->Base.Debug & RC_DBG_LOG;
|
||||
|
||||
/* Lists of instruction transformations. */
|
||||
struct radeon_program_transformation force_alpha_to_one[] = {
|
||||
{ &rc_force_output_alpha_to_one, c },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
/* Lists of instruction transformations. */
|
||||
struct radeon_program_transformation force_alpha_to_one[] = {{&rc_force_output_alpha_to_one, c},
|
||||
{NULL, NULL}};
|
||||
|
||||
struct radeon_program_transformation rewrite_tex[] = {
|
||||
{ &radeonTransformTEX, c },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
struct radeon_program_transformation rewrite_tex[] = {{&radeonTransformTEX, c}, {NULL, NULL}};
|
||||
|
||||
struct radeon_program_transformation native_rewrite_r500[] = {
|
||||
{ &radeonTransformALU, NULL },
|
||||
{ &radeonTransformDeriv, NULL },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
struct radeon_program_transformation native_rewrite_r500[] = {{&radeonTransformALU, NULL},
|
||||
{&radeonTransformDeriv, NULL},
|
||||
{NULL, NULL}};
|
||||
|
||||
struct radeon_program_transformation native_rewrite_r300[] = {
|
||||
{ &radeonTransformALU, NULL },
|
||||
{ &radeonStubDeriv, NULL },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
struct radeon_program_transformation native_rewrite_r300[] = {{&radeonTransformALU, NULL},
|
||||
{&radeonStubDeriv, NULL},
|
||||
{NULL, NULL}};
|
||||
|
||||
struct radeon_program_transformation opt_presubtract[] = {
|
||||
{ &rc_opt_presubtract, NULL },
|
||||
{ NULL, NULL }
|
||||
};
|
||||
struct radeon_program_transformation opt_presubtract[] = {{&rc_opt_presubtract, NULL},
|
||||
{NULL, NULL}};
|
||||
|
||||
/* List of compiler passes. */
|
||||
/* clang-format off */
|
||||
struct radeon_compiler_pass fs_list[] = {
|
||||
/* NAME DUMP PREDICATE FUNCTION PARAM */
|
||||
{"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL},
|
||||
{"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one},
|
||||
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
|
||||
{"transform IF", 1, is_r500, r500_transform_IF, NULL},
|
||||
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
|
||||
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
|
||||
{"deadcode", 1, opt, rc_dataflow_deadcode, NULL},
|
||||
{"convert rgb<->alpha", 1, opt, rc_convert_rgb_alpha, NULL},
|
||||
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
|
||||
{"dataflow optimize", 1, opt, rc_optimize, NULL},
|
||||
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
|
||||
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
|
||||
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
|
||||
{"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract},
|
||||
{"pair translate", 1, 1, rc_pair_translate, NULL},
|
||||
{"pair scheduling", 1, 1, rc_pair_schedule, &opt},
|
||||
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
|
||||
{"register allocation", 1, 1, rc_pair_regalloc, &opt},
|
||||
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
|
||||
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
|
||||
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
|
||||
{"dump machine code", 0, is_r500 && dbg, r500FragmentProgramDump, NULL},
|
||||
{"dump machine code", 0, !is_r500 && dbg, r300FragmentProgramDump, NULL},
|
||||
{NULL, 0, 0, NULL, NULL}};
|
||||
/* clang-format on */
|
||||
|
||||
/* List of compiler passes. */
|
||||
struct radeon_compiler_pass fs_list[] = {
|
||||
/* NAME DUMP PREDICATE FUNCTION PARAM */
|
||||
{"rewrite depth out", 1, 1, rc_rewrite_depth_out, NULL},
|
||||
{"force alpha to one", 1, alpha2one, rc_local_transform, force_alpha_to_one},
|
||||
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
|
||||
{"transform IF", 1, is_r500, r500_transform_IF, NULL},
|
||||
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
|
||||
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
|
||||
{"deadcode", 1, opt, rc_dataflow_deadcode, NULL},
|
||||
{"convert rgb<->alpha", 1, opt, rc_convert_rgb_alpha, NULL},
|
||||
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
|
||||
{"dataflow optimize", 1, opt, rc_optimize, NULL},
|
||||
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
|
||||
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
|
||||
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
|
||||
{"dataflow presubtract", 1, opt, rc_local_transform, opt_presubtract},
|
||||
{"pair translate", 1, 1, rc_pair_translate, NULL},
|
||||
{"pair scheduling", 1, 1, rc_pair_schedule, &opt},
|
||||
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
|
||||
{"register allocation", 1, 1, rc_pair_regalloc, &opt},
|
||||
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
|
||||
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
|
||||
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
|
||||
{"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
|
||||
{"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
|
||||
{NULL, 0, 0, NULL, NULL}
|
||||
};
|
||||
c->Base.type = RC_FRAGMENT_PROGRAM;
|
||||
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
|
||||
|
||||
c->Base.type = RC_FRAGMENT_PROGRAM;
|
||||
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
|
||||
rc_run_compiler(&c->Base, fs_list);
|
||||
|
||||
rc_run_compiler(&c->Base, fs_list);
|
||||
|
||||
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
|
||||
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -3,197 +3,200 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "radeon_compiler.h"
|
||||
#include "radeon_code.h"
|
||||
#include "r300_reg.h"
|
||||
#include "radeon_code.h"
|
||||
#include "radeon_compiler.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static const char* r300_vs_ve_ops[] = {
|
||||
/* R300 vector ops */
|
||||
" VE_NO_OP",
|
||||
" VE_DOT_PRODUCT",
|
||||
" VE_MULTIPLY",
|
||||
" VE_ADD",
|
||||
" VE_MULTIPLY_ADD",
|
||||
" VE_DISTANCE_FACTOR",
|
||||
" VE_FRACTION",
|
||||
" VE_MAXIMUM",
|
||||
" VE_MINIMUM",
|
||||
"VE_SET_GREATER_THAN_EQUAL",
|
||||
" VE_SET_LESS_THAN",
|
||||
" VE_MULTIPLYX2_ADD",
|
||||
" VE_MULTIPLY_CLAMP",
|
||||
" VE_FLT2FIX_DX",
|
||||
" VE_FLT2FIX_DX_RND",
|
||||
/* R500 vector ops */
|
||||
" VE_PRED_SET_EQ_PUSH",
|
||||
" VE_PRED_SET_GT_PUSH",
|
||||
" VE_PRED_SET_GTE_PUSH",
|
||||
" VE_PRED_SET_NEQ_PUSH",
|
||||
" VE_COND_WRITE_EQ",
|
||||
" VE_COND_WRITE_GT",
|
||||
" VE_COND_WRITE_GTE",
|
||||
" VE_COND_WRITE_NEQ",
|
||||
" VE_COND_MUX_EQ",
|
||||
" VE_COND_MUX_GT",
|
||||
" VE_COND_MUX_GTE",
|
||||
" VE_SET_GREATER_THAN",
|
||||
" VE_SET_EQUAL",
|
||||
" VE_SET_NOT_EQUAL",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
static const char *r300_vs_ve_ops[] = {
|
||||
/* R300 vector ops */
|
||||
" VE_NO_OP",
|
||||
" VE_DOT_PRODUCT",
|
||||
" VE_MULTIPLY",
|
||||
" VE_ADD",
|
||||
" VE_MULTIPLY_ADD",
|
||||
" VE_DISTANCE_FACTOR",
|
||||
" VE_FRACTION",
|
||||
" VE_MAXIMUM",
|
||||
" VE_MINIMUM",
|
||||
"VE_SET_GREATER_THAN_EQUAL",
|
||||
" VE_SET_LESS_THAN",
|
||||
" VE_MULTIPLYX2_ADD",
|
||||
" VE_MULTIPLY_CLAMP",
|
||||
" VE_FLT2FIX_DX",
|
||||
" VE_FLT2FIX_DX_RND",
|
||||
/* R500 vector ops */
|
||||
" VE_PRED_SET_EQ_PUSH",
|
||||
" VE_PRED_SET_GT_PUSH",
|
||||
" VE_PRED_SET_GTE_PUSH",
|
||||
" VE_PRED_SET_NEQ_PUSH",
|
||||
" VE_COND_WRITE_EQ",
|
||||
" VE_COND_WRITE_GT",
|
||||
" VE_COND_WRITE_GTE",
|
||||
" VE_COND_WRITE_NEQ",
|
||||
" VE_COND_MUX_EQ",
|
||||
" VE_COND_MUX_GT",
|
||||
" VE_COND_MUX_GTE",
|
||||
" VE_SET_GREATER_THAN",
|
||||
" VE_SET_EQUAL",
|
||||
" VE_SET_NOT_EQUAL",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
};
|
||||
|
||||
static const char* r300_vs_me_ops[] = {
|
||||
/* R300 math ops */
|
||||
" ME_NO_OP",
|
||||
" ME_EXP_BASE2_DX",
|
||||
" ME_LOG_BASE2_DX",
|
||||
" ME_EXP_BASEE_FF",
|
||||
" ME_LIGHT_COEFF_DX",
|
||||
" ME_POWER_FUNC_FF",
|
||||
" ME_RECIP_DX",
|
||||
" ME_RECIP_FF",
|
||||
" ME_RECIP_SQRT_DX",
|
||||
" ME_RECIP_SQRT_FF",
|
||||
" ME_MULTIPLY",
|
||||
" ME_EXP_BASE2_FULL_DX",
|
||||
" ME_LOG_BASE2_FULL_DX",
|
||||
" ME_POWER_FUNC_FF_CLAMP_B",
|
||||
"ME_POWER_FUNC_FF_CLAMP_B1",
|
||||
"ME_POWER_FUNC_FF_CLAMP_01",
|
||||
" ME_SIN",
|
||||
" ME_COS",
|
||||
/* R500 math ops */
|
||||
" ME_LOG_BASE2_IEEE",
|
||||
" ME_RECIP_IEEE",
|
||||
" ME_RECIP_SQRT_IEEE",
|
||||
" ME_PRED_SET_EQ",
|
||||
" ME_PRED_SET_GT",
|
||||
" ME_PRED_SET_GTE",
|
||||
" ME_PRED_SET_NEQ",
|
||||
" ME_PRED_SET_CLR",
|
||||
" ME_PRED_SET_INV",
|
||||
" ME_PRED_SET_POP",
|
||||
" ME_PRED_SET_RESTORE",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
static const char *r300_vs_me_ops[] = {
|
||||
/* R300 math ops */
|
||||
" ME_NO_OP",
|
||||
" ME_EXP_BASE2_DX",
|
||||
" ME_LOG_BASE2_DX",
|
||||
" ME_EXP_BASEE_FF",
|
||||
" ME_LIGHT_COEFF_DX",
|
||||
" ME_POWER_FUNC_FF",
|
||||
" ME_RECIP_DX",
|
||||
" ME_RECIP_FF",
|
||||
" ME_RECIP_SQRT_DX",
|
||||
" ME_RECIP_SQRT_FF",
|
||||
" ME_MULTIPLY",
|
||||
" ME_EXP_BASE2_FULL_DX",
|
||||
" ME_LOG_BASE2_FULL_DX",
|
||||
" ME_POWER_FUNC_FF_CLAMP_B",
|
||||
"ME_POWER_FUNC_FF_CLAMP_B1",
|
||||
"ME_POWER_FUNC_FF_CLAMP_01",
|
||||
" ME_SIN",
|
||||
" ME_COS",
|
||||
/* R500 math ops */
|
||||
" ME_LOG_BASE2_IEEE",
|
||||
" ME_RECIP_IEEE",
|
||||
" ME_RECIP_SQRT_IEEE",
|
||||
" ME_PRED_SET_EQ",
|
||||
" ME_PRED_SET_GT",
|
||||
" ME_PRED_SET_GTE",
|
||||
" ME_PRED_SET_NEQ",
|
||||
" ME_PRED_SET_CLR",
|
||||
" ME_PRED_SET_INV",
|
||||
" ME_PRED_SET_POP",
|
||||
" ME_PRED_SET_RESTORE",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
" (reserved)",
|
||||
};
|
||||
|
||||
/* XXX refactor to avoid clashing symbols */
|
||||
static const char* r300_vs_src_debug[] = {
|
||||
"t",
|
||||
"i",
|
||||
"c",
|
||||
"a",
|
||||
static const char *r300_vs_src_debug[] = {
|
||||
"t",
|
||||
"i",
|
||||
"c",
|
||||
"a",
|
||||
};
|
||||
|
||||
static const char* r300_vs_dst_debug[] = {
|
||||
"t",
|
||||
"a0",
|
||||
"o",
|
||||
"ox",
|
||||
"a",
|
||||
"i",
|
||||
"u",
|
||||
"u",
|
||||
static const char *r300_vs_dst_debug[] = {
|
||||
"t",
|
||||
"a0",
|
||||
"o",
|
||||
"ox",
|
||||
"a",
|
||||
"i",
|
||||
"u",
|
||||
"u",
|
||||
};
|
||||
|
||||
static const char* r300_vs_swiz_debug[] = {
|
||||
"X",
|
||||
"Y",
|
||||
"Z",
|
||||
"W",
|
||||
"0",
|
||||
"1",
|
||||
"U",
|
||||
"U",
|
||||
static const char *r300_vs_swiz_debug[] = {
|
||||
"X",
|
||||
"Y",
|
||||
"Z",
|
||||
"W",
|
||||
"0",
|
||||
"1",
|
||||
"U",
|
||||
"U",
|
||||
};
|
||||
|
||||
|
||||
static void r300_vs_op_dump(uint32_t op)
|
||||
static void
|
||||
r300_vs_op_dump(uint32_t op)
|
||||
{
|
||||
fprintf(stderr, " dst: %d%s op: ",
|
||||
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
|
||||
if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
|
||||
fprintf(stderr, "PRED %u",
|
||||
(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
|
||||
}
|
||||
if (op & 0x80) {
|
||||
if (op & 0x1) {
|
||||
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
|
||||
} else {
|
||||
fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
|
||||
}
|
||||
} else if (op & 0x40) {
|
||||
fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
|
||||
} else {
|
||||
fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
|
||||
}
|
||||
fprintf(stderr, " dst: %d%s op: ", (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
|
||||
if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
|
||||
fprintf(stderr, "PRED %u", (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
|
||||
}
|
||||
if (op & 0x80) {
|
||||
if (op & 0x1) {
|
||||
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
|
||||
} else {
|
||||
fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
|
||||
}
|
||||
} else if (op & 0x40) {
|
||||
fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
|
||||
} else {
|
||||
fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
|
||||
}
|
||||
}
|
||||
|
||||
static void r300_vs_src_dump(uint32_t src)
|
||||
static void
|
||||
r300_vs_src_dump(uint32_t src)
|
||||
{
|
||||
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
|
||||
(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
|
||||
src & (1 << 25) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 13) & 0x7],
|
||||
src & (1 << 26) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 16) & 0x7],
|
||||
src & (1 << 27) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 19) & 0x7],
|
||||
src & (1 << 28) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 22) & 0x7]);
|
||||
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", (src >> 5) & 0xff,
|
||||
r300_vs_src_debug[src & 0x3], src & (1 << 25) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 13) & 0x7], src & (1 << 26) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 16) & 0x7], src & (1 << 27) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 19) & 0x7], src & (1 << 28) ? "-" : " ",
|
||||
r300_vs_swiz_debug[(src >> 22) & 0x7]);
|
||||
}
|
||||
|
||||
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
|
||||
void
|
||||
r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
|
||||
{
|
||||
struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
|
||||
struct r300_vertex_program_code * vs = c->code;
|
||||
unsigned instrcount = vs->length / 4;
|
||||
unsigned i;
|
||||
struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler *)compiler;
|
||||
struct r300_vertex_program_code *vs = c->code;
|
||||
unsigned instrcount = vs->length / 4;
|
||||
unsigned i;
|
||||
|
||||
fprintf(stderr, "Final vertex program code:\n");
|
||||
fprintf(stderr, "Final vertex program code:\n");
|
||||
|
||||
for(i = 0; i < instrcount; i++) {
|
||||
unsigned offset = i*4;
|
||||
unsigned src;
|
||||
for (i = 0; i < instrcount; i++) {
|
||||
unsigned offset = i * 4;
|
||||
unsigned src;
|
||||
|
||||
fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
|
||||
r300_vs_op_dump(vs->body.d[offset]);
|
||||
fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
|
||||
r300_vs_op_dump(vs->body.d[offset]);
|
||||
|
||||
for(src = 0; src < 3; ++src) {
|
||||
fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
|
||||
r300_vs_src_dump(vs->body.d[offset+1+src]);
|
||||
}
|
||||
}
|
||||
for (src = 0; src < 3; ++src) {
|
||||
fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset + 1 + src]);
|
||||
r300_vs_src_dump(vs->body.d[offset + 1 + src]);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
|
||||
for(i = 0; i < vs->num_fc_ops; i++) {
|
||||
unsigned is_loop = 0;
|
||||
switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
|
||||
case 0: fprintf(stderr, "NOP"); break;
|
||||
case 1: fprintf(stderr, "JUMP"); break;
|
||||
case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
|
||||
case 3: fprintf(stderr, "JSR"); break;
|
||||
}
|
||||
if (c->Base.is_r500) {
|
||||
fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
|
||||
"loop data->0x%08x\n",
|
||||
vs->fc_op_addrs.r500[i].uw,
|
||||
vs->fc_op_addrs.r500[i].lw,
|
||||
vs->fc_loop_index[i]);
|
||||
if (is_loop) {
|
||||
fprintf(stderr, "Before = %u First = %u Last = %u\n",
|
||||
vs->fc_op_addrs.r500[i].lw & 0xffff,
|
||||
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
|
||||
vs->fc_op_addrs.r500[i].uw & 0xffff);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Flow Control Ops: 0x%08x\n", vs->fc_ops);
|
||||
for (i = 0; i < vs->num_fc_ops; i++) {
|
||||
unsigned is_loop = 0;
|
||||
switch ((vs->fc_ops >> (i * 2)) & 0x3) {
|
||||
case 0:
|
||||
fprintf(stderr, "NOP");
|
||||
break;
|
||||
case 1:
|
||||
fprintf(stderr, "JUMP");
|
||||
break;
|
||||
case 2:
|
||||
fprintf(stderr, "LOOP");
|
||||
is_loop = 1;
|
||||
break;
|
||||
case 3:
|
||||
fprintf(stderr, "JSR");
|
||||
break;
|
||||
}
|
||||
if (c->Base.is_r500) {
|
||||
fprintf(stderr,
|
||||
": uw-> 0x%08x lw-> 0x%08x "
|
||||
"loop data->0x%08x\n",
|
||||
vs->fc_op_addrs.r500[i].uw, vs->fc_op_addrs.r500[i].lw, vs->fc_loop_index[i]);
|
||||
if (is_loop) {
|
||||
fprintf(
|
||||
stderr, "Before = %u First = %u Last = %u\n", vs->fc_op_addrs.r500[i].lw & 0xffff,
|
||||
(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff, vs->fc_op_addrs.r500[i].uw & 0xffff);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, ": 0x%08x\n", vs->fc_op_addrs.r300[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,208 +7,194 @@
|
|||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "r300_reg.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_list.h"
|
||||
#include "radeon_variable.h"
|
||||
#include "r300_reg.h"
|
||||
|
||||
#include "util/compiler.h"
|
||||
|
||||
/**
|
||||
* Rewrite IF instructions to use the ALU result special register.
|
||||
*/
|
||||
static void r500_transform_IF_instr(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst_if,
|
||||
struct rc_list * var_list)
|
||||
static void
|
||||
r500_transform_IF_instr(struct radeon_compiler *c, struct rc_instruction *inst_if,
|
||||
struct rc_list *var_list)
|
||||
{
|
||||
|
||||
struct rc_variable * writer;
|
||||
struct rc_list * writer_list, * list_ptr;
|
||||
unsigned int generic_if = 0;
|
||||
unsigned int alu_chan;
|
||||
struct rc_variable *writer;
|
||||
struct rc_list *writer_list, *list_ptr;
|
||||
unsigned int generic_if = 0;
|
||||
unsigned int alu_chan;
|
||||
|
||||
writer_list = rc_variable_list_get_writers(
|
||||
var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
|
||||
if (!writer_list) {
|
||||
generic_if = 1;
|
||||
} else {
|
||||
writer_list = rc_variable_list_get_writers(var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
|
||||
if (!writer_list) {
|
||||
generic_if = 1;
|
||||
} else {
|
||||
|
||||
/* Make sure it is safe for the writers to write to
|
||||
* ALU Result */
|
||||
for (list_ptr = writer_list; list_ptr;
|
||||
list_ptr = list_ptr->Next) {
|
||||
struct rc_instruction * inst;
|
||||
writer = list_ptr->Item;
|
||||
/* We are going to modify the destination register
|
||||
* of writer, so if it has a reader other than
|
||||
* inst_if (aka ReaderCount > 1) we must fall back to
|
||||
* our generic IF.
|
||||
* If the writer has a lower IP than inst_if, this
|
||||
* means that inst_if is above the writer in a loop.
|
||||
* I'm not sure why this would ever happen, but
|
||||
* if it does we want to make sure we fall back
|
||||
* to our generic IF. */
|
||||
if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
|
||||
generic_if = 1;
|
||||
break;
|
||||
}
|
||||
/* Make sure it is safe for the writers to write to
|
||||
* ALU Result */
|
||||
for (list_ptr = writer_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
struct rc_instruction *inst;
|
||||
writer = list_ptr->Item;
|
||||
/* We are going to modify the destination register
|
||||
* of writer, so if it has a reader other than
|
||||
* inst_if (aka ReaderCount > 1) we must fall back to
|
||||
* our generic IF.
|
||||
* If the writer has a lower IP than inst_if, this
|
||||
* means that inst_if is above the writer in a loop.
|
||||
* I'm not sure why this would ever happen, but
|
||||
* if it does we want to make sure we fall back
|
||||
* to our generic IF. */
|
||||
if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
|
||||
generic_if = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The ALU Result is not preserved across IF
|
||||
* instructions, so if there is another IF
|
||||
* instruction between writer and inst_if, then
|
||||
* we need to fall back to generic IF. */
|
||||
for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
if (info->IsFlowControl) {
|
||||
generic_if = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (generic_if) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* The ALU Result is not preserved across IF
|
||||
* instructions, so if there is another IF
|
||||
* instruction between writer and inst_if, then
|
||||
* we need to fall back to generic IF. */
|
||||
for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
if (info->IsFlowControl) {
|
||||
generic_if = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (generic_if) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
|
||||
alu_chan = RC_ALURESULT_X;
|
||||
} else {
|
||||
alu_chan = RC_ALURESULT_W;
|
||||
}
|
||||
if (generic_if) {
|
||||
struct rc_instruction * inst_mov =
|
||||
rc_insert_new_instruction(c, inst_if->Prev);
|
||||
if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
|
||||
alu_chan = RC_ALURESULT_X;
|
||||
} else {
|
||||
alu_chan = RC_ALURESULT_W;
|
||||
}
|
||||
if (generic_if) {
|
||||
struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst_if->Prev);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.WriteMask = 0;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_NONE;
|
||||
inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
inst_mov->U.I.WriteALUResult = alu_chan;
|
||||
inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
|
||||
if (alu_chan == RC_ALURESULT_X) {
|
||||
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
|
||||
inst_mov->U.I.SrcReg[0].Swizzle,
|
||||
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
|
||||
} else {
|
||||
inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
|
||||
inst_mov->U.I.SrcReg[0].Swizzle,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
|
||||
}
|
||||
} else {
|
||||
rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
|
||||
unsigned int preserve_opcode = 0;
|
||||
for (list_ptr = writer_list; list_ptr;
|
||||
list_ptr = list_ptr->Next) {
|
||||
writer = list_ptr->Item;
|
||||
switch(writer->Inst->U.I.Opcode) {
|
||||
case RC_OPCODE_SEQ:
|
||||
compare_func = RC_COMPARE_FUNC_EQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SNE:
|
||||
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SGE:
|
||||
compare_func = RC_COMPARE_FUNC_GEQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SLT:
|
||||
compare_func = RC_COMPARE_FUNC_LESS;
|
||||
break;
|
||||
default:
|
||||
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
preserve_opcode = 1;
|
||||
break;
|
||||
}
|
||||
if (!preserve_opcode) {
|
||||
writer->Inst->U.I.Opcode = RC_OPCODE_ADD;
|
||||
writer->Inst->U.I.SrcReg[1].Negate =
|
||||
~writer->Inst->U.I.SrcReg[1].Negate;
|
||||
}
|
||||
writer->Inst->U.I.DstReg.WriteMask = 0;
|
||||
writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
|
||||
writer->Inst->U.I.WriteALUResult = alu_chan;
|
||||
writer->Inst->U.I.ALUResultCompare = compare_func;
|
||||
}
|
||||
}
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.WriteMask = 0;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_NONE;
|
||||
inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
inst_mov->U.I.WriteALUResult = alu_chan;
|
||||
inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
|
||||
if (alu_chan == RC_ALURESULT_X) {
|
||||
inst_mov->U.I.SrcReg[0].Swizzle =
|
||||
combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
|
||||
} else {
|
||||
inst_mov->U.I.SrcReg[0].Swizzle =
|
||||
combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
|
||||
}
|
||||
} else {
|
||||
rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
|
||||
unsigned int preserve_opcode = 0;
|
||||
for (list_ptr = writer_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
writer = list_ptr->Item;
|
||||
switch (writer->Inst->U.I.Opcode) {
|
||||
case RC_OPCODE_SEQ:
|
||||
compare_func = RC_COMPARE_FUNC_EQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SNE:
|
||||
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SGE:
|
||||
compare_func = RC_COMPARE_FUNC_GEQUAL;
|
||||
break;
|
||||
case RC_OPCODE_SLT:
|
||||
compare_func = RC_COMPARE_FUNC_LESS;
|
||||
break;
|
||||
default:
|
||||
compare_func = RC_COMPARE_FUNC_NOTEQUAL;
|
||||
preserve_opcode = 1;
|
||||
break;
|
||||
}
|
||||
if (!preserve_opcode) {
|
||||
writer->Inst->U.I.Opcode = RC_OPCODE_ADD;
|
||||
writer->Inst->U.I.SrcReg[1].Negate = ~writer->Inst->U.I.SrcReg[1].Negate;
|
||||
}
|
||||
writer->Inst->U.I.DstReg.WriteMask = 0;
|
||||
writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
|
||||
writer->Inst->U.I.WriteALUResult = alu_chan;
|
||||
writer->Inst->U.I.ALUResultCompare = compare_func;
|
||||
}
|
||||
}
|
||||
|
||||
inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
|
||||
inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
|
||||
inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
|
||||
RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
|
||||
inst_if->U.I.SrcReg[0].Negate = 0;
|
||||
inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
|
||||
inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
|
||||
inst_if->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
|
||||
inst_if->U.I.SrcReg[0].Negate = 0;
|
||||
}
|
||||
|
||||
void r500_transform_IF(
|
||||
struct radeon_compiler * c,
|
||||
void *user)
|
||||
void
|
||||
r500_transform_IF(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_list * var_list = rc_get_variables(c);
|
||||
struct rc_list *var_list = rc_get_variables(c);
|
||||
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * current = inst;
|
||||
inst = inst->Next;
|
||||
struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction *current = inst;
|
||||
inst = inst->Next;
|
||||
|
||||
if (current->U.I.Opcode == RC_OPCODE_IF)
|
||||
r500_transform_IF_instr(c, current, var_list);
|
||||
}
|
||||
if (current->U.I.Opcode == RC_OPCODE_IF)
|
||||
r500_transform_IF_instr(c, current, var_list);
|
||||
}
|
||||
}
|
||||
|
||||
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
||||
static int
|
||||
r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
||||
{
|
||||
unsigned int relevant;
|
||||
int i;
|
||||
unsigned int relevant;
|
||||
int i;
|
||||
|
||||
if (opcode == RC_OPCODE_TEX ||
|
||||
opcode == RC_OPCODE_TXB ||
|
||||
opcode == RC_OPCODE_TXP ||
|
||||
opcode == RC_OPCODE_TXD ||
|
||||
opcode == RC_OPCODE_TXL ||
|
||||
opcode == RC_OPCODE_KIL) {
|
||||
if (reg.Abs)
|
||||
return 0;
|
||||
if (opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || opcode == RC_OPCODE_TXP ||
|
||||
opcode == RC_OPCODE_TXD || opcode == RC_OPCODE_TXL || opcode == RC_OPCODE_KIL) {
|
||||
if (reg.Abs)
|
||||
return 0;
|
||||
|
||||
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
|
||||
return 0;
|
||||
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
|
||||
return 0;
|
||||
|
||||
for(i = 0; i < 4; ++i) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, i);
|
||||
if (swz == RC_SWIZZLE_UNUSED) {
|
||||
reg.Negate &= ~(1 << i);
|
||||
continue;
|
||||
}
|
||||
if (swz >= 4)
|
||||
return 0;
|
||||
}
|
||||
for (i = 0; i < 4; ++i) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, i);
|
||||
if (swz == RC_SWIZZLE_UNUSED) {
|
||||
reg.Negate &= ~(1 << i);
|
||||
continue;
|
||||
}
|
||||
if (swz >= 4)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (reg.Negate)
|
||||
return 0;
|
||||
if (reg.Negate)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
|
||||
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
|
||||
* if it doesn't fit perfectly into a .xyzw case... */
|
||||
if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
|
||||
return 1;
|
||||
return 1;
|
||||
} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
|
||||
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
|
||||
* if it doesn't fit perfectly into a .xyzw case... */
|
||||
if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
/* ALU instructions support almost everything */
|
||||
relevant = 0;
|
||||
for(i = 0; i < 3; ++i) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, i);
|
||||
if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
|
||||
relevant |= 1 << i;
|
||||
}
|
||||
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
|
||||
return 0;
|
||||
return 0;
|
||||
} else {
|
||||
/* ALU instructions support almost everything */
|
||||
relevant = 0;
|
||||
for (i = 0; i < 3; ++i) {
|
||||
unsigned int swz = GET_SWZ(reg.Swizzle, i);
|
||||
if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
|
||||
relevant |= 1 << i;
|
||||
}
|
||||
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -217,301 +203,273 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
|||
* The only thing we *cannot* do in an ALU instruction is per-component
|
||||
* negation.
|
||||
*/
|
||||
static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
|
||||
struct rc_swizzle_split * split)
|
||||
static void
|
||||
r500_swizzle_split(struct rc_src_register src, unsigned int usemask, struct rc_swizzle_split *split)
|
||||
{
|
||||
unsigned int negatebase[2] = { 0, 0 };
|
||||
int i;
|
||||
unsigned int negatebase[2] = {0, 0};
|
||||
int i;
|
||||
|
||||
for(i = 0; i < 4; ++i) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, i);
|
||||
if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
|
||||
continue;
|
||||
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
|
||||
}
|
||||
for (i = 0; i < 4; ++i) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, i);
|
||||
if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
|
||||
continue;
|
||||
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
|
||||
}
|
||||
|
||||
split->NumPhases = 0;
|
||||
split->NumPhases = 0;
|
||||
|
||||
for(i = 0; i <= 1; ++i) {
|
||||
if (!negatebase[i])
|
||||
continue;
|
||||
for (i = 0; i <= 1; ++i) {
|
||||
if (!negatebase[i])
|
||||
continue;
|
||||
|
||||
split->Phase[split->NumPhases++] = negatebase[i];
|
||||
}
|
||||
split->Phase[split->NumPhases++] = negatebase[i];
|
||||
}
|
||||
}
|
||||
|
||||
const struct rc_swizzle_caps r500_swizzle_caps = {
|
||||
.IsNative = r500_swizzle_is_native,
|
||||
.Split = r500_swizzle_split
|
||||
};
|
||||
const struct rc_swizzle_caps r500_swizzle_caps = {.IsNative = r500_swizzle_is_native,
|
||||
.Split = r500_swizzle_split};
|
||||
|
||||
static char *toswiz(int swiz_val) {
|
||||
switch(swiz_val) {
|
||||
case 0: return "R";
|
||||
case 1: return "G";
|
||||
case 2: return "B";
|
||||
case 3: return "A";
|
||||
case 4: return "0";
|
||||
case 5: return "H";
|
||||
case 6: return "1";
|
||||
case 7: return "U";
|
||||
}
|
||||
return NULL;
|
||||
static char *
|
||||
toswiz(int swiz_val)
|
||||
{
|
||||
switch (swiz_val) {
|
||||
case 0: return "R";
|
||||
case 1: return "G";
|
||||
case 2: return "B";
|
||||
case 3: return "A";
|
||||
case 4: return "0";
|
||||
case 5: return "H";
|
||||
case 6: return "1";
|
||||
case 7: return "U";
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static char *toop(int op_val)
|
||||
static char *
|
||||
toop(int op_val)
|
||||
{
|
||||
char *str = NULL;
|
||||
switch (op_val) {
|
||||
case 0: str = "MAD"; break;
|
||||
case 1: str = "DP3"; break;
|
||||
case 2: str = "DP4"; break;
|
||||
case 3: str = "D2A"; break;
|
||||
case 4: str = "MIN"; break;
|
||||
case 5: str = "MAX"; break;
|
||||
case 6: str = "Reserved"; break;
|
||||
case 7: str = "CND"; break;
|
||||
case 8: str = "CMP"; break;
|
||||
case 9: str = "FRC"; break;
|
||||
case 10: str = "SOP"; break;
|
||||
case 11: str = "MDH"; break;
|
||||
case 12: str = "MDV"; break;
|
||||
}
|
||||
return str;
|
||||
char *str = NULL;
|
||||
switch (op_val) {
|
||||
case 0: str = "MAD"; break;
|
||||
case 1: str = "DP3"; break;
|
||||
case 2: str = "DP4"; break;
|
||||
case 3: str = "D2A"; break;
|
||||
case 4: str = "MIN"; break;
|
||||
case 5: str = "MAX"; break;
|
||||
case 6: str = "Reserved"; break;
|
||||
case 7: str = "CND"; break;
|
||||
case 8: str = "CMP"; break;
|
||||
case 9: str = "FRC"; break;
|
||||
case 10: str = "SOP"; break;
|
||||
case 11: str = "MDH"; break;
|
||||
case 12: str = "MDV"; break;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *to_alpha_op(int op_val)
|
||||
static char *
|
||||
to_alpha_op(int op_val)
|
||||
{
|
||||
char *str = NULL;
|
||||
switch (op_val) {
|
||||
case 0: str = "MAD"; break;
|
||||
case 1: str = "DP"; break;
|
||||
case 2: str = "MIN"; break;
|
||||
case 3: str = "MAX"; break;
|
||||
case 4: str = "Reserved"; break;
|
||||
case 5: str = "CND"; break;
|
||||
case 6: str = "CMP"; break;
|
||||
case 7: str = "FRC"; break;
|
||||
case 8: str = "EX2"; break;
|
||||
case 9: str = "LN2"; break;
|
||||
case 10: str = "RCP"; break;
|
||||
case 11: str = "RSQ"; break;
|
||||
case 12: str = "SIN"; break;
|
||||
case 13: str = "COS"; break;
|
||||
case 14: str = "MDH"; break;
|
||||
case 15: str = "MDV"; break;
|
||||
}
|
||||
return str;
|
||||
char *str = NULL;
|
||||
switch (op_val) {
|
||||
case 0: str = "MAD"; break;
|
||||
case 1: str = "DP"; break;
|
||||
case 2: str = "MIN"; break;
|
||||
case 3: str = "MAX"; break;
|
||||
case 4: str = "Reserved"; break;
|
||||
case 5: str = "CND"; break;
|
||||
case 6: str = "CMP"; break;
|
||||
case 7: str = "FRC"; break;
|
||||
case 8: str = "EX2"; break;
|
||||
case 9: str = "LN2"; break;
|
||||
case 10: str = "RCP"; break;
|
||||
case 11: str = "RSQ"; break;
|
||||
case 12: str = "SIN"; break;
|
||||
case 13: str = "COS"; break;
|
||||
case 14: str = "MDH"; break;
|
||||
case 15: str = "MDV"; break;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *to_mask(int val)
|
||||
static char *
|
||||
to_mask(int val)
|
||||
{
|
||||
char *str = NULL;
|
||||
switch(val) {
|
||||
case 0: str = "NONE"; break;
|
||||
case 1: str = "R"; break;
|
||||
case 2: str = "G"; break;
|
||||
case 3: str = "RG"; break;
|
||||
case 4: str = "B"; break;
|
||||
case 5: str = "RB"; break;
|
||||
case 6: str = "GB"; break;
|
||||
case 7: str = "RGB"; break;
|
||||
case 8: str = "A"; break;
|
||||
case 9: str = "AR"; break;
|
||||
case 10: str = "AG"; break;
|
||||
case 11: str = "ARG"; break;
|
||||
case 12: str = "AB"; break;
|
||||
case 13: str = "ARB"; break;
|
||||
case 14: str = "AGB"; break;
|
||||
case 15: str = "ARGB"; break;
|
||||
}
|
||||
return str;
|
||||
char *str = NULL;
|
||||
switch (val) {
|
||||
case 0: str = "NONE"; break;
|
||||
case 1: str = "R"; break;
|
||||
case 2: str = "G"; break;
|
||||
case 3: str = "RG"; break;
|
||||
case 4: str = "B"; break;
|
||||
case 5: str = "RB"; break;
|
||||
case 6: str = "GB"; break;
|
||||
case 7: str = "RGB"; break;
|
||||
case 8: str = "A"; break;
|
||||
case 9: str = "AR"; break;
|
||||
case 10: str = "AG"; break;
|
||||
case 11: str = "ARG"; break;
|
||||
case 12: str = "AB"; break;
|
||||
case 13: str = "ARB"; break;
|
||||
case 14: str = "AGB"; break;
|
||||
case 15: str = "ARGB"; break;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *to_texop(int val)
|
||||
static char *
|
||||
to_texop(int val)
|
||||
{
|
||||
switch(val) {
|
||||
case 0: return "NOP";
|
||||
case 1: return "LD";
|
||||
case 2: return "TEXKILL";
|
||||
case 3: return "PROJ";
|
||||
case 4: return "LODBIAS";
|
||||
case 5: return "LOD";
|
||||
case 6: return "DXDY";
|
||||
}
|
||||
return NULL;
|
||||
switch (val) {
|
||||
case 0: return "NOP";
|
||||
case 1: return "LD";
|
||||
case 2: return "TEXKILL";
|
||||
case 3: return "PROJ";
|
||||
case 4: return "LODBIAS";
|
||||
case 5: return "LOD";
|
||||
case 6: return "DXDY";
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
r500FragmentProgramDump(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
|
||||
struct r500_fragment_program_code *code = &compiler->code->code.r500;
|
||||
int n, i;
|
||||
uint32_t inst;
|
||||
uint32_t inst0;
|
||||
char *str = NULL;
|
||||
fprintf(stderr, "R500 Fragment Program:\n--------\n");
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
|
||||
struct r500_fragment_program_code *code = &compiler->code->code.r500;
|
||||
int n, i;
|
||||
uint32_t inst;
|
||||
uint32_t inst0;
|
||||
char *str = NULL;
|
||||
fprintf(stderr, "R500 Fragment Program:\n--------\n");
|
||||
|
||||
for (n = 0; n < code->inst_end+1; n++) {
|
||||
inst0 = inst = code->inst[n].inst0;
|
||||
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
|
||||
switch(inst & 0x3) {
|
||||
case R500_INST_TYPE_ALU: str = "ALU"; break;
|
||||
case R500_INST_TYPE_OUT: str = "OUT"; break;
|
||||
case R500_INST_TYPE_FC: str = "FC"; break;
|
||||
case R500_INST_TYPE_TEX: str = "TEX"; break;
|
||||
}
|
||||
fprintf(stderr,"%s %s %s %s %s ", str,
|
||||
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
|
||||
inst & R500_INST_LAST ? "LAST" : "",
|
||||
inst & R500_INST_NOP ? "NOP" : "",
|
||||
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
|
||||
fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
|
||||
to_mask((inst >> 15) & 0xf));
|
||||
|
||||
switch(inst0 & 0x3) {
|
||||
case R500_INST_TYPE_ALU:
|
||||
case R500_INST_TYPE_OUT:
|
||||
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
|
||||
inst = code->inst[n].inst1;
|
||||
|
||||
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
|
||||
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
|
||||
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
|
||||
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
|
||||
(inst >> 30));
|
||||
|
||||
fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
|
||||
inst = code->inst[n].inst2;
|
||||
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
|
||||
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
|
||||
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
|
||||
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
|
||||
(inst >> 30));
|
||||
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
|
||||
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
|
||||
(inst >> 11) & 0x3,
|
||||
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
|
||||
(inst >> 24) & 0x3, (inst >> 29) & 0x3);
|
||||
|
||||
|
||||
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
|
||||
inst = code->inst[n].inst4;
|
||||
fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
|
||||
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
|
||||
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
|
||||
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
|
||||
(inst >> 29) & 0x3,
|
||||
(inst >> 31) & 0x1);
|
||||
|
||||
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
|
||||
inst = code->inst[n].inst5;
|
||||
fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
|
||||
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
|
||||
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
|
||||
(inst >> 23) & 0x3,
|
||||
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
|
||||
break;
|
||||
case R500_INST_TYPE_FC:
|
||||
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
|
||||
inst = code->inst[n].inst2;
|
||||
/* JUMP_FUNC JUMP_ANY*/
|
||||
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
|
||||
(inst & R500_FC_JUMP_ANY) >> 5);
|
||||
|
||||
/* OP */
|
||||
switch(inst & 0x7){
|
||||
case R500_FC_OP_JUMP:
|
||||
fprintf(stderr, "JUMP");
|
||||
break;
|
||||
case R500_FC_OP_LOOP:
|
||||
fprintf(stderr, "LOOP");
|
||||
break;
|
||||
case R500_FC_OP_ENDLOOP:
|
||||
fprintf(stderr, "ENDLOOP");
|
||||
break;
|
||||
case R500_FC_OP_REP:
|
||||
fprintf(stderr, "REP");
|
||||
break;
|
||||
case R500_FC_OP_ENDREP:
|
||||
fprintf(stderr, "ENDREP");
|
||||
break;
|
||||
case R500_FC_OP_BREAKLOOP:
|
||||
fprintf(stderr, "BREAKLOOP");
|
||||
break;
|
||||
case R500_FC_OP_BREAKREP:
|
||||
fprintf(stderr, "BREAKREP");
|
||||
break;
|
||||
case R500_FC_OP_CONTINUE:
|
||||
fprintf(stderr, "CONTINUE");
|
||||
break;
|
||||
for (n = 0; n < code->inst_end + 1; n++) {
|
||||
inst0 = inst = code->inst[n].inst0;
|
||||
fprintf(stderr, "%d\t0:CMN_INST 0x%08x:", n, inst);
|
||||
switch (inst & 0x3) {
|
||||
case R500_INST_TYPE_ALU: str = "ALU"; break;
|
||||
case R500_INST_TYPE_OUT: str = "OUT"; break;
|
||||
case R500_INST_TYPE_FC: str = "FC"; break;
|
||||
case R500_INST_TYPE_TEX: str = "TEX"; break;
|
||||
}
|
||||
fprintf(stderr," ");
|
||||
/* A_OP */
|
||||
switch(inst & (0x3 << 6)){
|
||||
case R500_FC_A_OP_NONE:
|
||||
fprintf(stderr, "NONE");
|
||||
break;
|
||||
case R500_FC_A_OP_POP:
|
||||
fprintf(stderr, "POP");
|
||||
break;
|
||||
case R500_FC_A_OP_PUSH:
|
||||
fprintf(stderr, "PUSH");
|
||||
break;
|
||||
}
|
||||
/* B_OP0 B_OP1 */
|
||||
for(i=0; i<2; i++){
|
||||
fprintf(stderr, " ");
|
||||
switch(inst & (0x3 << (24 + (i * 2)))){
|
||||
/* R500_FC_B_OP0_NONE
|
||||
* R500_FC_B_OP1_NONE */
|
||||
case 0:
|
||||
fprintf(stderr, "NONE");
|
||||
break;
|
||||
case R500_FC_B_OP0_DECR:
|
||||
case R500_FC_B_OP1_DECR:
|
||||
fprintf(stderr, "DECR");
|
||||
break;
|
||||
case R500_FC_B_OP0_INCR:
|
||||
case R500_FC_B_OP1_INCR:
|
||||
fprintf(stderr, "INCR");
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*POP_CNT B_ELSE */
|
||||
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
|
||||
inst = code->inst[n].inst3;
|
||||
/* JUMP_ADDR */
|
||||
fprintf(stderr, " %d", inst >> 16);
|
||||
|
||||
if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
|
||||
fprintf(stderr, " IGN_UNC");
|
||||
}
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
|
||||
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
|
||||
inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
|
||||
break;
|
||||
case R500_INST_TYPE_TEX:
|
||||
inst = code->inst[n].inst1;
|
||||
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
|
||||
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
|
||||
(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
|
||||
inst = code->inst[n].inst2;
|
||||
fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
|
||||
inst & 127, inst & (1<<7) ? "(rel)" : "",
|
||||
toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
|
||||
toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
|
||||
(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
|
||||
toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
|
||||
toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
|
||||
fprintf(stderr, "%s %s %s %s %s ", str, inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
|
||||
inst & R500_INST_LAST ? "LAST" : "", inst & R500_INST_NOP ? "NOP" : "",
|
||||
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
|
||||
fprintf(stderr, "wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
|
||||
to_mask((inst >> 15) & 0xf));
|
||||
|
||||
fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
|
||||
break;
|
||||
}
|
||||
fprintf(stderr,"\n");
|
||||
}
|
||||
switch (inst0 & 0x3) {
|
||||
case R500_INST_TYPE_ALU:
|
||||
case R500_INST_TYPE_OUT:
|
||||
fprintf(stderr, "\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
|
||||
inst = code->inst[n].inst1;
|
||||
|
||||
fprintf(stderr, "Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff,
|
||||
(inst & (1 << 8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't',
|
||||
(inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', (inst >> 30));
|
||||
|
||||
fprintf(stderr, "\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
|
||||
inst = code->inst[n].inst2;
|
||||
fprintf(stderr, "Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff,
|
||||
(inst & (1 << 8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't',
|
||||
(inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', (inst >> 30));
|
||||
fprintf(stderr, "\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr, "rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
|
||||
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7),
|
||||
toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, (inst >> 13) & 0x3,
|
||||
toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
|
||||
(inst >> 24) & 0x3, (inst >> 29) & 0x3);
|
||||
|
||||
fprintf(stderr, "\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
|
||||
inst = code->inst[n].inst4;
|
||||
fprintf(stderr, "%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n",
|
||||
to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1 << 11) ? "(rel)" : "",
|
||||
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
|
||||
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
|
||||
(inst >> 29) & 0x3, (inst >> 31) & 0x1);
|
||||
|
||||
fprintf(stderr, "\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
|
||||
inst = code->inst[n].inst5;
|
||||
fprintf(stderr, "%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n",
|
||||
toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1 << 11) ? "(rel)" : "",
|
||||
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7),
|
||||
toswiz((inst >> 20) & 0x7), (inst >> 23) & 0x3, (inst >> 25) & 0x3,
|
||||
toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
|
||||
break;
|
||||
case R500_INST_TYPE_FC:
|
||||
fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
|
||||
inst = code->inst[n].inst2;
|
||||
/* JUMP_FUNC JUMP_ANY*/
|
||||
fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff, (inst & R500_FC_JUMP_ANY) >> 5);
|
||||
|
||||
/* OP */
|
||||
switch (inst & 0x7) {
|
||||
case R500_FC_OP_JUMP: fprintf(stderr, "JUMP"); break;
|
||||
case R500_FC_OP_LOOP: fprintf(stderr, "LOOP"); break;
|
||||
case R500_FC_OP_ENDLOOP: fprintf(stderr, "ENDLOOP"); break;
|
||||
case R500_FC_OP_REP: fprintf(stderr, "REP"); break;
|
||||
case R500_FC_OP_ENDREP: fprintf(stderr, "ENDREP"); break;
|
||||
case R500_FC_OP_BREAKLOOP: fprintf(stderr, "BREAKLOOP"); break;
|
||||
case R500_FC_OP_BREAKREP: fprintf(stderr, "BREAKREP"); break;
|
||||
case R500_FC_OP_CONTINUE: fprintf(stderr, "CONTINUE"); break;
|
||||
}
|
||||
fprintf(stderr, " ");
|
||||
/* A_OP */
|
||||
switch (inst & (0x3 << 6)) {
|
||||
case R500_FC_A_OP_NONE: fprintf(stderr, "NONE"); break;
|
||||
case R500_FC_A_OP_POP: fprintf(stderr, "POP"); break;
|
||||
case R500_FC_A_OP_PUSH: fprintf(stderr, "PUSH"); break;
|
||||
}
|
||||
/* B_OP0 B_OP1 */
|
||||
for (i = 0; i < 2; i++) {
|
||||
fprintf(stderr, " ");
|
||||
switch (inst & (0x3 << (24 + (i * 2)))) {
|
||||
/* R500_FC_B_OP0_NONE
|
||||
* R500_FC_B_OP1_NONE */
|
||||
case 0:
|
||||
fprintf(stderr, "NONE");
|
||||
break;
|
||||
case R500_FC_B_OP0_DECR:
|
||||
case R500_FC_B_OP1_DECR:
|
||||
fprintf(stderr, "DECR");
|
||||
break;
|
||||
case R500_FC_B_OP0_INCR:
|
||||
case R500_FC_B_OP1_INCR:
|
||||
fprintf(stderr, "INCR");
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*POP_CNT B_ELSE */
|
||||
fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
|
||||
inst = code->inst[n].inst3;
|
||||
/* JUMP_ADDR */
|
||||
fprintf(stderr, " %d", inst >> 16);
|
||||
|
||||
if (code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED) {
|
||||
fprintf(stderr, " IGN_UNC");
|
||||
}
|
||||
inst = code->inst[n].inst3;
|
||||
fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
|
||||
fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n", inst & 0x1f,
|
||||
(inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
|
||||
break;
|
||||
case R500_INST_TYPE_TEX:
|
||||
inst = code->inst[n].inst1;
|
||||
fprintf(stderr, "\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst,
|
||||
(inst >> 16) & 0xf, to_texop((inst >> 22) & 0x7), (inst & (1 << 25)) ? "ACQ" : "",
|
||||
(inst & (1 << 26)) ? "IGNUNC" : "", (inst & (1 << 27)) ? "UNSCALED" : "SCALED");
|
||||
inst = code->inst[n].inst2;
|
||||
fprintf(stderr, "\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n",
|
||||
inst, inst & 127, inst & (1 << 7) ? "(rel)" : "", toswiz((inst >> 8) & 0x3),
|
||||
toswiz((inst >> 10) & 0x3), toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
|
||||
(inst >> 16) & 127, inst & (1 << 23) ? "(rel)" : "", toswiz((inst >> 24) & 0x3),
|
||||
toswiz((inst >> 26) & 0x3), toswiz((inst >> 28) & 0x3),
|
||||
toswiz((inst >> 30) & 0x3));
|
||||
|
||||
fprintf(stderr, "\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,8 +18,6 @@ extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
|
|||
|
||||
extern const struct rc_swizzle_caps r500_swizzle_caps;
|
||||
|
||||
extern void r500_transform_IF(
|
||||
struct radeon_compiler * c,
|
||||
void* data);
|
||||
extern void r500_transform_IF(struct radeon_compiler *c, void *data);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4,8 +4,8 @@
|
|||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "r300_nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "r300_nir.h"
|
||||
|
||||
static int
|
||||
follow_modifiers(nir_instr *instr)
|
||||
|
|
@ -22,14 +22,13 @@ follow_modifiers(nir_instr *instr)
|
|||
if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
|
||||
intrin->intrinsic == nir_intrinsic_load_constant ||
|
||||
intrin->intrinsic == nir_intrinsic_load_input) {
|
||||
nir_foreach_use(use, &intrin->def) {
|
||||
if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
|
||||
return intrin->def.index;
|
||||
}
|
||||
nir_foreach_use (use, &intrin->def) {
|
||||
if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
|
||||
return intrin->def.index;
|
||||
}
|
||||
}
|
||||
if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
|
||||
!nir_src_is_const(intrin->src[1]))
|
||||
return intrin->def.index;
|
||||
if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && !nir_src_is_const(intrin->src[1]))
|
||||
return intrin->def.index;
|
||||
}
|
||||
/* Assume the worst when we see a phi. */
|
||||
if (instr->type == nir_instr_type_phi)
|
||||
|
|
@ -55,10 +54,9 @@ has_three_different_tmp_sources(nir_alu_instr *fcsel)
|
|||
if (index == -1)
|
||||
return false;
|
||||
else
|
||||
src_def_index[i] = index;
|
||||
src_def_index[i] = index;
|
||||
}
|
||||
return src_def_index[0] != src_def_index[1] &&
|
||||
src_def_index[0] != src_def_index[2] &&
|
||||
return src_def_index[0] != src_def_index[1] && src_def_index[0] != src_def_index[2] &&
|
||||
src_def_index[1] != src_def_index[2];
|
||||
}
|
||||
|
||||
|
|
@ -96,20 +94,16 @@ r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
|
|||
* even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
|
||||
*/
|
||||
nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
|
||||
if (alu->op == nir_op_fcsel ||
|
||||
(alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
|
||||
nir_ssa_for_alu_src(b, alu, 1),
|
||||
if (alu->op == nir_op_fcsel || (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1),
|
||||
nir_ssa_for_alu_src(b, alu, 0));
|
||||
} else if (alu->op == nir_op_fcsel_ge) {
|
||||
nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
|
||||
nir_ssa_for_alu_src(b, alu, 1), sge);
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), sge);
|
||||
} else {
|
||||
nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
|
||||
nir_imm_float(b, 0.0));
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
|
||||
nir_ssa_for_alu_src(b, alu, 1), slt);
|
||||
nir_def *slt =
|
||||
nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), nir_imm_float(b, 0.0));
|
||||
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), nir_ssa_for_alu_src(b, alu, 1), slt);
|
||||
}
|
||||
|
||||
nir_def_replace(&alu->def, lrp);
|
||||
|
|
@ -121,6 +115,5 @@ r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
|
|||
bool
|
||||
r300_nir_lower_fcsel_r500(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr,
|
||||
nir_metadata_control_flow, NULL);
|
||||
return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr, nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,193 +5,197 @@
|
|||
|
||||
#include "radeon_code.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "radeon_program.h"
|
||||
|
||||
void rc_constants_init(struct rc_constant_list * c)
|
||||
void
|
||||
rc_constants_init(struct rc_constant_list *c)
|
||||
{
|
||||
memset(c, 0, sizeof(*c));
|
||||
memset(c, 0, sizeof(*c));
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy a constants structure, assuming that the destination structure
|
||||
* is not initialized.
|
||||
*/
|
||||
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
|
||||
void
|
||||
rc_constants_copy(struct rc_constant_list *dst, struct rc_constant_list *src)
|
||||
{
|
||||
dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
|
||||
memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
|
||||
dst->Count = src->Count;
|
||||
dst->_Reserved = src->Count;
|
||||
dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
|
||||
memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
|
||||
dst->Count = src->Count;
|
||||
dst->_Reserved = src->Count;
|
||||
}
|
||||
|
||||
void rc_constants_destroy(struct rc_constant_list * c)
|
||||
void
|
||||
rc_constants_destroy(struct rc_constant_list *c)
|
||||
{
|
||||
free(c->Constants);
|
||||
memset(c, 0, sizeof(*c));
|
||||
free(c->Constants);
|
||||
memset(c, 0, sizeof(*c));
|
||||
}
|
||||
|
||||
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
|
||||
unsigned
|
||||
rc_constants_add(struct rc_constant_list *c, struct rc_constant *constant)
|
||||
{
|
||||
unsigned index = c->Count;
|
||||
unsigned index = c->Count;
|
||||
|
||||
if (c->Count >= c->_Reserved) {
|
||||
struct rc_constant * newlist;
|
||||
if (c->Count >= c->_Reserved) {
|
||||
struct rc_constant *newlist;
|
||||
|
||||
c->_Reserved = c->_Reserved * 2;
|
||||
if (!c->_Reserved)
|
||||
c->_Reserved = 16;
|
||||
c->_Reserved = c->_Reserved * 2;
|
||||
if (!c->_Reserved)
|
||||
c->_Reserved = 16;
|
||||
|
||||
newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
|
||||
memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
|
||||
newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
|
||||
memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
|
||||
|
||||
free(c->Constants);
|
||||
c->Constants = newlist;
|
||||
}
|
||||
free(c->Constants);
|
||||
c->Constants = newlist;
|
||||
}
|
||||
|
||||
c->Constants[index] = *constant;
|
||||
c->Count++;
|
||||
c->Constants[index] = *constant;
|
||||
c->Count++;
|
||||
|
||||
return index;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a state vector to the constant list, while trying to avoid duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
|
||||
unsigned
|
||||
rc_constants_add_state(struct rc_constant_list *c, unsigned state0, unsigned state1)
|
||||
{
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
|
||||
if (c->Constants[index].u.State[0] == state0 &&
|
||||
c->Constants[index].u.State[1] == state1)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
for (index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_STATE) {
|
||||
if (c->Constants[index].u.State[0] == state0 && c->Constants[index].u.State[1] == state1)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_STATE;
|
||||
constant.UseMask = RC_MASK_XYZW;
|
||||
constant.u.State[0] = state0;
|
||||
constant.u.State[1] = state1;
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_STATE;
|
||||
constant.UseMask = RC_MASK_XYZW;
|
||||
constant.u.State[0] = state0;
|
||||
constant.u.State[1] = state1;
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add an immediate vector to the constant list, while trying to avoid
|
||||
* duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
|
||||
unsigned
|
||||
rc_constants_add_immediate_vec4(struct rc_constant_list *c, const float *data)
|
||||
{
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
unsigned index;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
|
||||
return index;
|
||||
}
|
||||
}
|
||||
for (index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float) * 4))
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.UseMask = RC_MASK_XYZW;
|
||||
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.UseMask = RC_MASK_XYZW;
|
||||
memcpy(constant.u.Immediate, data, sizeof(float) * 4);
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add an immediate scalar to the constant list, while trying to avoid
|
||||
* duplicates.
|
||||
*/
|
||||
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
|
||||
unsigned
|
||||
rc_constants_add_immediate_scalar(struct rc_constant_list *c, float data, unsigned *swizzle)
|
||||
{
|
||||
unsigned index, free_comp;
|
||||
int free_index = -1;
|
||||
struct rc_constant constant;
|
||||
unsigned index, free_comp;
|
||||
int free_index = -1;
|
||||
struct rc_constant constant;
|
||||
|
||||
for(index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
unsigned comp;
|
||||
for(comp = 0; comp < 4; ++comp) {
|
||||
if (c->Constants[index].UseMask & 1 << comp) {
|
||||
if (c->Constants[index].u.Immediate[comp] == data) {
|
||||
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
|
||||
return index;
|
||||
}
|
||||
} else {
|
||||
if (free_index == -1) {
|
||||
free_index = index;
|
||||
free_comp = comp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (index = 0; index < c->Count; ++index) {
|
||||
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
unsigned comp;
|
||||
for (comp = 0; comp < 4; ++comp) {
|
||||
if (c->Constants[index].UseMask & 1 << comp) {
|
||||
if (c->Constants[index].u.Immediate[comp] == data) {
|
||||
*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
|
||||
return index;
|
||||
}
|
||||
} else {
|
||||
if (free_index == -1) {
|
||||
free_index = index;
|
||||
free_comp = comp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (free_index >= 0) {
|
||||
c->Constants[free_index].u.Immediate[free_comp] = data;
|
||||
c->Constants[free_index].UseMask |= 1 << free_comp;
|
||||
*swizzle = RC_MAKE_SWIZZLE_SMEAR(free_comp);
|
||||
return free_index;
|
||||
}
|
||||
if (free_index >= 0) {
|
||||
c->Constants[free_index].u.Immediate[free_comp] = data;
|
||||
c->Constants[free_index].UseMask |= 1 << free_comp;
|
||||
*swizzle = RC_MAKE_SWIZZLE_SMEAR(free_comp);
|
||||
return free_index;
|
||||
}
|
||||
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.UseMask = RC_MASK_X;
|
||||
constant.u.Immediate[0] = data;
|
||||
*swizzle = RC_SWIZZLE_XXXX;
|
||||
memset(&constant, 0, sizeof(constant));
|
||||
constant.Type = RC_CONSTANT_IMMEDIATE;
|
||||
constant.UseMask = RC_MASK_X;
|
||||
constant.u.Immediate[0] = data;
|
||||
*swizzle = RC_SWIZZLE_XXXX;
|
||||
|
||||
return rc_constants_add(c, &constant);
|
||||
return rc_constants_add(c, &constant);
|
||||
}
|
||||
|
||||
static char swizzle_char(unsigned swz)
|
||||
static char
|
||||
swizzle_char(unsigned swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case RC_SWIZZLE_X:
|
||||
return 'x';
|
||||
case RC_SWIZZLE_Y:
|
||||
return 'y';
|
||||
case RC_SWIZZLE_Z:
|
||||
return 'z';
|
||||
case RC_SWIZZLE_W:
|
||||
return 'w';
|
||||
default:
|
||||
return 'u';
|
||||
}
|
||||
switch (swz) {
|
||||
case RC_SWIZZLE_X:
|
||||
return 'x';
|
||||
case RC_SWIZZLE_Y:
|
||||
return 'y';
|
||||
case RC_SWIZZLE_Z:
|
||||
return 'z';
|
||||
case RC_SWIZZLE_W:
|
||||
return 'w';
|
||||
default:
|
||||
return 'u';
|
||||
}
|
||||
}
|
||||
|
||||
void rc_constants_print(struct rc_constant_list *c, struct const_remap *r)
|
||||
void
|
||||
rc_constants_print(struct rc_constant_list *c, struct const_remap *r)
|
||||
{
|
||||
for (unsigned i = 0; i < c->Count; i++) {
|
||||
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
float *values = c->Constants[i].u.Immediate;
|
||||
fprintf(stderr, "CONST[%u] = {", i);
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (c->Constants[i].UseMask & 1 << chan)
|
||||
fprintf(stderr, "%11.6f ", values[chan]);
|
||||
else
|
||||
fprintf(stderr, " unused ");
|
||||
}
|
||||
fprintf(stderr, "}\n");
|
||||
}
|
||||
if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) {
|
||||
fprintf(stderr, "CONST[%u] = {", i);
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
fprintf(stderr, "CONST[%i].%c ", r[i].index[chan],
|
||||
swizzle_char(r[i].swizzle[chan]));
|
||||
}
|
||||
fprintf(stderr, " }\n");
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < c->Count; i++) {
|
||||
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
|
||||
float *values = c->Constants[i].u.Immediate;
|
||||
fprintf(stderr, "CONST[%u] = {", i);
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (c->Constants[i].UseMask & 1 << chan)
|
||||
fprintf(stderr, "%11.6f ", values[chan]);
|
||||
else
|
||||
fprintf(stderr, " unused ");
|
||||
}
|
||||
fprintf(stderr, "}\n");
|
||||
}
|
||||
if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) {
|
||||
fprintf(stderr, "CONST[%u] = {", i);
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
fprintf(stderr, "CONST[%i].%c ", r[i].index[chan], swizzle_char(r[i].swizzle[chan]));
|
||||
}
|
||||
fprintf(stderr, " }\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,78 +14,79 @@
|
|||
#define R300_PFS_NUM_TEMP_REGS 32
|
||||
#define R300_PFS_NUM_CONST_REGS 32
|
||||
|
||||
#define R400_PFS_MAX_ALU_INST 512
|
||||
#define R400_PFS_MAX_TEX_INST 512
|
||||
#define R400_PFS_MAX_ALU_INST 512
|
||||
#define R400_PFS_MAX_TEX_INST 512
|
||||
|
||||
#define R500_PFS_MAX_INST 512
|
||||
#define R500_PFS_NUM_TEMP_REGS 128
|
||||
#define R500_PFS_NUM_CONST_REGS 256
|
||||
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
|
||||
#define R500_PFS_MAX_INST 512
|
||||
#define R500_PFS_NUM_TEMP_REGS 128
|
||||
#define R500_PFS_NUM_CONST_REGS 256
|
||||
#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
|
||||
#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
|
||||
|
||||
/* The r500 maximum depth is not just for loops, but any combination of loops
|
||||
* and subroutine jumps. */
|
||||
#define R500_PVS_MAX_LOOP_DEPTH 8
|
||||
|
||||
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
|
||||
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER + 0)
|
||||
|
||||
enum {
|
||||
/**
|
||||
* External constants are constants whose meaning is unknown to this
|
||||
* compiler. For example, a Mesa gl_program's constants are turned
|
||||
* into external constants.
|
||||
*/
|
||||
RC_CONSTANT_EXTERNAL = 0,
|
||||
/**
|
||||
* External constants are constants whose meaning is unknown to this
|
||||
* compiler. For example, a Mesa gl_program's constants are turned
|
||||
* into external constants.
|
||||
*/
|
||||
RC_CONSTANT_EXTERNAL = 0,
|
||||
|
||||
RC_CONSTANT_IMMEDIATE,
|
||||
RC_CONSTANT_IMMEDIATE,
|
||||
|
||||
/**
|
||||
* Constant referring to state that is known by this compiler,
|
||||
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
|
||||
*/
|
||||
RC_CONSTANT_STATE
|
||||
/**
|
||||
* Constant referring to state that is known by this compiler,
|
||||
* see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
|
||||
*/
|
||||
RC_CONSTANT_STATE
|
||||
};
|
||||
|
||||
enum {
|
||||
RC_STATE_SHADOW_AMBIENT = 0,
|
||||
RC_STATE_SHADOW_AMBIENT = 0,
|
||||
|
||||
RC_STATE_R300_WINDOW_DIMENSION,
|
||||
RC_STATE_R300_TEXRECT_FACTOR,
|
||||
RC_STATE_R300_TEXSCALE_FACTOR,
|
||||
RC_STATE_R300_VIEWPORT_SCALE,
|
||||
RC_STATE_R300_VIEWPORT_OFFSET
|
||||
RC_STATE_R300_WINDOW_DIMENSION,
|
||||
RC_STATE_R300_TEXRECT_FACTOR,
|
||||
RC_STATE_R300_TEXSCALE_FACTOR,
|
||||
RC_STATE_R300_VIEWPORT_SCALE,
|
||||
RC_STATE_R300_VIEWPORT_OFFSET
|
||||
};
|
||||
|
||||
struct rc_constant {
|
||||
unsigned Type:2; /**< RC_CONSTANT_xxx */
|
||||
unsigned UseMask:4;
|
||||
unsigned Type : 2; /**< RC_CONSTANT_xxx */
|
||||
unsigned UseMask : 4;
|
||||
|
||||
union {
|
||||
unsigned External;
|
||||
float Immediate[4];
|
||||
unsigned State[2];
|
||||
} u;
|
||||
union {
|
||||
unsigned External;
|
||||
float Immediate[4];
|
||||
unsigned State[2];
|
||||
} u;
|
||||
};
|
||||
|
||||
struct rc_constant_list {
|
||||
struct rc_constant * Constants;
|
||||
unsigned Count;
|
||||
struct rc_constant *Constants;
|
||||
unsigned Count;
|
||||
|
||||
unsigned _Reserved;
|
||||
unsigned _Reserved;
|
||||
};
|
||||
|
||||
struct const_remap {
|
||||
int index[4];
|
||||
uint8_t swizzle[4];
|
||||
int index[4];
|
||||
uint8_t swizzle[4];
|
||||
};
|
||||
|
||||
void rc_constants_init(struct rc_constant_list * c);
|
||||
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
|
||||
void rc_constants_destroy(struct rc_constant_list * c);
|
||||
unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
|
||||
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
|
||||
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
|
||||
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
|
||||
void rc_constants_init(struct rc_constant_list *c);
|
||||
void rc_constants_copy(struct rc_constant_list *dst, struct rc_constant_list *src);
|
||||
void rc_constants_destroy(struct rc_constant_list *c);
|
||||
unsigned rc_constants_add(struct rc_constant_list *c, struct rc_constant *constant);
|
||||
unsigned rc_constants_add_state(struct rc_constant_list *c, unsigned state1, unsigned state2);
|
||||
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list *c, const float *data);
|
||||
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list *c, float data,
|
||||
unsigned *swizzle);
|
||||
void rc_constants_print(struct rc_constant_list *c, struct const_remap *r);
|
||||
|
||||
/**
|
||||
|
|
@ -95,14 +96,14 @@ void rc_constants_print(struct rc_constant_list *c, struct const_remap *r);
|
|||
* the correct GL compare function.
|
||||
*/
|
||||
typedef enum {
|
||||
RC_COMPARE_FUNC_NEVER = 0,
|
||||
RC_COMPARE_FUNC_LESS,
|
||||
RC_COMPARE_FUNC_EQUAL,
|
||||
RC_COMPARE_FUNC_LEQUAL,
|
||||
RC_COMPARE_FUNC_GREATER,
|
||||
RC_COMPARE_FUNC_NOTEQUAL,
|
||||
RC_COMPARE_FUNC_GEQUAL,
|
||||
RC_COMPARE_FUNC_ALWAYS
|
||||
RC_COMPARE_FUNC_NEVER = 0,
|
||||
RC_COMPARE_FUNC_LESS,
|
||||
RC_COMPARE_FUNC_EQUAL,
|
||||
RC_COMPARE_FUNC_LEQUAL,
|
||||
RC_COMPARE_FUNC_GREATER,
|
||||
RC_COMPARE_FUNC_NOTEQUAL,
|
||||
RC_COMPARE_FUNC_GEQUAL,
|
||||
RC_COMPARE_FUNC_ALWAYS
|
||||
} rc_compare_func;
|
||||
|
||||
/**
|
||||
|
|
@ -111,173 +112,168 @@ typedef enum {
|
|||
* These are not quite the same as their GL counterparts yet.
|
||||
*/
|
||||
typedef enum {
|
||||
RC_WRAP_NONE = 0,
|
||||
RC_WRAP_REPEAT,
|
||||
RC_WRAP_MIRRORED_REPEAT,
|
||||
RC_WRAP_MIRRORED_CLAMP
|
||||
RC_WRAP_NONE = 0,
|
||||
RC_WRAP_REPEAT,
|
||||
RC_WRAP_MIRRORED_REPEAT,
|
||||
RC_WRAP_MIRRORED_CLAMP
|
||||
} rc_wrap_mode;
|
||||
|
||||
/**
|
||||
* Stores state that influences the compilation of a fragment program.
|
||||
*/
|
||||
struct r300_fragment_program_external_state {
|
||||
struct {
|
||||
/**
|
||||
* This field contains swizzle for some lowering passes
|
||||
* (shadow comparison, unorm->snorm conversion)
|
||||
*/
|
||||
unsigned texture_swizzle:12;
|
||||
struct {
|
||||
/**
|
||||
* This field contains swizzle for some lowering passes
|
||||
* (shadow comparison, unorm->snorm conversion)
|
||||
*/
|
||||
unsigned texture_swizzle : 12;
|
||||
|
||||
/**
|
||||
* If the sampler is used as a shadow sampler,
|
||||
* this field specifies the compare function.
|
||||
*
|
||||
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
|
||||
* \sa rc_compare_func
|
||||
*/
|
||||
unsigned texture_compare_func : 3;
|
||||
/**
|
||||
* If the sampler is used as a shadow sampler,
|
||||
* this field specifies the compare function.
|
||||
*
|
||||
* Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
|
||||
* \sa rc_compare_func
|
||||
*/
|
||||
unsigned texture_compare_func : 3;
|
||||
|
||||
/**
|
||||
* No matter what the sampler type is,
|
||||
* this field turns it into a shadow sampler.
|
||||
*/
|
||||
unsigned compare_mode_enabled : 1;
|
||||
/**
|
||||
* No matter what the sampler type is,
|
||||
* this field turns it into a shadow sampler.
|
||||
*/
|
||||
unsigned compare_mode_enabled : 1;
|
||||
|
||||
/**
|
||||
* This field specifies wrapping modes for the sampler.
|
||||
*
|
||||
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
|
||||
* will be performed on the coordinates.
|
||||
*/
|
||||
unsigned wrap_mode : 3;
|
||||
/**
|
||||
* This field specifies wrapping modes for the sampler.
|
||||
*
|
||||
* If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
|
||||
* will be performed on the coordinates.
|
||||
*/
|
||||
unsigned wrap_mode : 3;
|
||||
|
||||
/**
|
||||
* The coords are scaled after applying the wrap mode emulation
|
||||
* and right before texture fetch. The scaling factor is given by
|
||||
* RC_STATE_R300_TEXSCALE_FACTOR. */
|
||||
unsigned clamp_and_scale_before_fetch : 1;
|
||||
} unit[16];
|
||||
/**
|
||||
* The coords are scaled after applying the wrap mode emulation
|
||||
* and right before texture fetch. The scaling factor is given by
|
||||
* RC_STATE_R300_TEXSCALE_FACTOR. */
|
||||
unsigned clamp_and_scale_before_fetch : 1;
|
||||
} unit[16];
|
||||
|
||||
unsigned alpha_to_one:1;
|
||||
unsigned alpha_to_one : 1;
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct r300_fragment_program_node {
|
||||
int tex_offset; /**< first tex instruction */
|
||||
int tex_end; /**< last tex instruction, relative to tex_offset */
|
||||
int alu_offset; /**< first ALU instruction */
|
||||
int alu_end; /**< last ALU instruction, relative to alu_offset */
|
||||
int flags;
|
||||
int tex_offset; /**< first tex instruction */
|
||||
int tex_end; /**< last tex instruction, relative to tex_offset */
|
||||
int alu_offset; /**< first ALU instruction */
|
||||
int alu_end; /**< last ALU instruction, relative to alu_offset */
|
||||
int flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* Stores an R300 fragment program in its compiled-to-hardware form.
|
||||
*/
|
||||
struct r300_fragment_program_code {
|
||||
struct {
|
||||
unsigned int length; /**< total # of texture instructions used */
|
||||
uint32_t inst[R400_PFS_MAX_TEX_INST];
|
||||
} tex;
|
||||
struct {
|
||||
unsigned int length; /**< total # of texture instructions used */
|
||||
uint32_t inst[R400_PFS_MAX_TEX_INST];
|
||||
} tex;
|
||||
|
||||
struct {
|
||||
unsigned int length; /**< total # of ALU instructions used */
|
||||
struct {
|
||||
uint32_t rgb_inst;
|
||||
uint32_t rgb_addr;
|
||||
uint32_t alpha_inst;
|
||||
uint32_t alpha_addr;
|
||||
uint32_t r400_ext_addr;
|
||||
} inst[R400_PFS_MAX_ALU_INST];
|
||||
} alu;
|
||||
struct {
|
||||
unsigned int length; /**< total # of ALU instructions used */
|
||||
struct {
|
||||
uint32_t rgb_inst;
|
||||
uint32_t rgb_addr;
|
||||
uint32_t alpha_inst;
|
||||
uint32_t alpha_addr;
|
||||
uint32_t r400_ext_addr;
|
||||
} inst[R400_PFS_MAX_ALU_INST];
|
||||
} alu;
|
||||
|
||||
uint32_t config; /* US_CONFIG */
|
||||
uint32_t pixsize; /* US_PIXSIZE */
|
||||
uint32_t code_offset; /* US_CODE_OFFSET */
|
||||
uint32_t r400_code_offset_ext; /* US_CODE_EXT */
|
||||
uint32_t code_addr[4]; /* US_CODE_ADDR */
|
||||
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
|
||||
* for r400 cards */
|
||||
unsigned int r390_mode:1;
|
||||
uint32_t config; /* US_CONFIG */
|
||||
uint32_t pixsize; /* US_PIXSIZE */
|
||||
uint32_t code_offset; /* US_CODE_OFFSET */
|
||||
uint32_t r400_code_offset_ext; /* US_CODE_EXT */
|
||||
uint32_t code_addr[4]; /* US_CODE_ADDR */
|
||||
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
|
||||
* for r400 cards */
|
||||
unsigned int r390_mode : 1;
|
||||
};
|
||||
|
||||
|
||||
struct r500_fragment_program_code {
|
||||
struct {
|
||||
uint32_t inst0;
|
||||
uint32_t inst1;
|
||||
uint32_t inst2;
|
||||
uint32_t inst3;
|
||||
uint32_t inst4;
|
||||
uint32_t inst5;
|
||||
} inst[R500_PFS_MAX_INST];
|
||||
struct {
|
||||
uint32_t inst0;
|
||||
uint32_t inst1;
|
||||
uint32_t inst2;
|
||||
uint32_t inst3;
|
||||
uint32_t inst4;
|
||||
uint32_t inst5;
|
||||
} inst[R500_PFS_MAX_INST];
|
||||
|
||||
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
|
||||
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
|
||||
|
||||
int max_temp_idx;
|
||||
int max_temp_idx;
|
||||
|
||||
uint32_t us_fc_ctrl;
|
||||
uint32_t us_fc_ctrl;
|
||||
|
||||
uint32_t int_constants[32];
|
||||
uint32_t int_constant_count;
|
||||
uint32_t int_constants[32];
|
||||
uint32_t int_constant_count;
|
||||
};
|
||||
|
||||
struct rX00_fragment_program_code {
|
||||
union {
|
||||
struct r300_fragment_program_code r300;
|
||||
struct r500_fragment_program_code r500;
|
||||
} code;
|
||||
union {
|
||||
struct r300_fragment_program_code r300;
|
||||
struct r500_fragment_program_code r500;
|
||||
} code;
|
||||
|
||||
unsigned writes_depth:1;
|
||||
unsigned writes_depth : 1;
|
||||
|
||||
struct rc_constant_list constants;
|
||||
struct const_remap *constants_remap_table;
|
||||
struct rc_constant_list constants;
|
||||
struct const_remap *constants_remap_table;
|
||||
};
|
||||
|
||||
|
||||
#define R300_VS_MAX_ALU 256
|
||||
#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
|
||||
#define R500_VS_MAX_ALU 1024
|
||||
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
|
||||
#define R300_VS_MAX_TEMPS 32
|
||||
#define R300_VS_MAX_ALU 256
|
||||
#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
|
||||
#define R500_VS_MAX_ALU 1024
|
||||
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
|
||||
#define R300_VS_MAX_TEMPS 32
|
||||
/* This is the max for all chipsets (r300-r500) */
|
||||
#define R300_VS_MAX_FC_OPS 16
|
||||
#define R300_VS_MAX_FC_OPS 16
|
||||
#define R300_VS_MAX_LOOP_DEPTH 1
|
||||
|
||||
#define VSF_MAX_INPUTS 32
|
||||
#define VSF_MAX_INPUTS 32
|
||||
#define VSF_MAX_OUTPUTS 32
|
||||
|
||||
struct r300_vertex_program_code {
|
||||
int length;
|
||||
union {
|
||||
uint32_t d[R500_VS_MAX_ALU_DWORDS];
|
||||
float f[R500_VS_MAX_ALU_DWORDS];
|
||||
} body;
|
||||
int length;
|
||||
union {
|
||||
uint32_t d[R500_VS_MAX_ALU_DWORDS];
|
||||
float f[R500_VS_MAX_ALU_DWORDS];
|
||||
} body;
|
||||
|
||||
int pos_end;
|
||||
int num_temporaries; /* Number of temp vars used by program */
|
||||
int inputs[VSF_MAX_INPUTS];
|
||||
int outputs[VSF_MAX_OUTPUTS];
|
||||
unsigned last_input_read;
|
||||
unsigned last_pos_write;
|
||||
int pos_end;
|
||||
int num_temporaries; /* Number of temp vars used by program */
|
||||
int inputs[VSF_MAX_INPUTS];
|
||||
int outputs[VSF_MAX_OUTPUTS];
|
||||
unsigned last_input_read;
|
||||
unsigned last_pos_write;
|
||||
|
||||
struct rc_constant_list constants;
|
||||
struct const_remap *constants_remap_table;
|
||||
struct rc_constant_list constants;
|
||||
struct const_remap *constants_remap_table;
|
||||
|
||||
uint32_t InputsRead;
|
||||
uint32_t OutputsWritten;
|
||||
uint32_t InputsRead;
|
||||
uint32_t OutputsWritten;
|
||||
|
||||
unsigned int num_fc_ops;
|
||||
uint32_t fc_ops;
|
||||
union {
|
||||
uint32_t r300[R300_VS_MAX_FC_OPS];
|
||||
struct {
|
||||
uint32_t lw;
|
||||
uint32_t uw;
|
||||
} r500[R300_VS_MAX_FC_OPS];
|
||||
} fc_op_addrs;
|
||||
int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
|
||||
unsigned int num_fc_ops;
|
||||
uint32_t fc_ops;
|
||||
union {
|
||||
uint32_t r300[R300_VS_MAX_FC_OPS];
|
||||
struct {
|
||||
uint32_t lw;
|
||||
uint32_t uw;
|
||||
} r500[R300_VS_MAX_FC_OPS];
|
||||
} fc_op_addrs;
|
||||
int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
|
||||
};
|
||||
|
||||
#endif /* RADEON_CODE_H */
|
||||
|
||||
|
|
|
|||
|
|
@ -10,104 +10,108 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_pair.h"
|
||||
#include "radeon_regalloc.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
|
||||
|
||||
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
|
||||
void
|
||||
rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs)
|
||||
{
|
||||
memset(c, 0, sizeof(*c));
|
||||
memset(c, 0, sizeof(*c));
|
||||
|
||||
memory_pool_init(&c->Pool);
|
||||
c->Program.Instructions.Prev = &c->Program.Instructions;
|
||||
c->Program.Instructions.Next = &c->Program.Instructions;
|
||||
c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
|
||||
c->regalloc_state = rs;
|
||||
c->max_temp_index = -1;
|
||||
memory_pool_init(&c->Pool);
|
||||
c->Program.Instructions.Prev = &c->Program.Instructions;
|
||||
c->Program.Instructions.Next = &c->Program.Instructions;
|
||||
c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
|
||||
c->regalloc_state = rs;
|
||||
c->max_temp_index = -1;
|
||||
}
|
||||
|
||||
void rc_destroy(struct radeon_compiler * c)
|
||||
void
|
||||
rc_destroy(struct radeon_compiler *c)
|
||||
{
|
||||
rc_constants_destroy(&c->Program.Constants);
|
||||
memory_pool_destroy(&c->Pool);
|
||||
free(c->ErrorMsg);
|
||||
rc_constants_destroy(&c->Program.Constants);
|
||||
memory_pool_destroy(&c->Pool);
|
||||
free(c->ErrorMsg);
|
||||
}
|
||||
|
||||
void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
|
||||
void
|
||||
rc_debug(struct radeon_compiler *c, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_list ap;
|
||||
|
||||
if (!(c->Debug & RC_DBG_LOG))
|
||||
return;
|
||||
if (!(c->Debug & RC_DBG_LOG))
|
||||
return;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void rc_error(struct radeon_compiler * c, const char * fmt, ...)
|
||||
void
|
||||
rc_error(struct radeon_compiler *c, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_list ap;
|
||||
|
||||
c->Error = 1;
|
||||
c->Error = 1;
|
||||
|
||||
if (!c->ErrorMsg) {
|
||||
/* Only remember the first error */
|
||||
char buf[1024];
|
||||
int written;
|
||||
if (!c->ErrorMsg) {
|
||||
/* Only remember the first error */
|
||||
char buf[1024];
|
||||
int written;
|
||||
|
||||
va_start(ap, fmt);
|
||||
written = vsnprintf(buf, sizeof(buf), fmt, ap);
|
||||
va_end(ap);
|
||||
va_start(ap, fmt);
|
||||
written = vsnprintf(buf, sizeof(buf), fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (written < sizeof(buf)) {
|
||||
c->ErrorMsg = strdup(buf);
|
||||
} else {
|
||||
c->ErrorMsg = malloc(written + 1);
|
||||
if (written < sizeof(buf)) {
|
||||
c->ErrorMsg = strdup(buf);
|
||||
} else {
|
||||
c->ErrorMsg = malloc(written + 1);
|
||||
|
||||
va_start(ap, fmt);
|
||||
vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
va_start(ap, fmt);
|
||||
vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
|
||||
if (c->Debug & RC_DBG_LOG) {
|
||||
fprintf(stderr, "r300compiler error: ");
|
||||
if (c->Debug & RC_DBG_LOG) {
|
||||
fprintf(stderr, "r300compiler error: ");
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
|
||||
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
|
||||
int
|
||||
rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion)
|
||||
{
|
||||
rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
|
||||
return 1;
|
||||
rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void rc_mark_unused_channels(struct radeon_compiler * c, void *user)
|
||||
void
|
||||
rc_mark_unused_channels(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
unsigned int srcmasks[3];
|
||||
unsigned int srcmasks[3];
|
||||
|
||||
for(struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
|
||||
rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
|
||||
rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
|
||||
|
||||
for(unsigned int src = 0; src < 3; ++src) {
|
||||
for(unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (!GET_BIT(srcmasks[src], chan))
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int src = 0; src < 3; ++src) {
|
||||
for (unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (!GET_BIT(srcmasks[src], chan))
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -115,191 +119,192 @@ void rc_mark_unused_channels(struct radeon_compiler * c, void *user)
|
|||
* based on which inputs and outputs are actually referenced
|
||||
* in program instructions.
|
||||
*/
|
||||
void rc_calculate_inputs_outputs(struct radeon_compiler * c)
|
||||
void
|
||||
rc_calculate_inputs_outputs(struct radeon_compiler *c)
|
||||
{
|
||||
struct rc_instruction *inst;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
c->Program.InputsRead = 0;
|
||||
c->Program.OutputsWritten = 0;
|
||||
c->Program.InputsRead = 0;
|
||||
c->Program.OutputsWritten = 0;
|
||||
|
||||
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
int i;
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < opcode->NumSrcRegs; ++i) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
|
||||
c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
|
||||
}
|
||||
for (i = 0; i < opcode->NumSrcRegs; ++i) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
|
||||
c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
|
||||
}
|
||||
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
|
||||
c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
|
||||
}
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
|
||||
c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite the program such that a given output is duplicated.
|
||||
*/
|
||||
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
|
||||
void
|
||||
rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output)
|
||||
{
|
||||
unsigned tempreg = rc_find_free_temporary(c);
|
||||
struct rc_instruction * inst;
|
||||
struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
|
||||
struct rc_instruction * last_write_inst = NULL;
|
||||
unsigned branch_depth = 0;
|
||||
unsigned loop_depth = 0;
|
||||
bool emit_after_control_flow = false;
|
||||
unsigned num_writes = 0;
|
||||
unsigned tempreg = rc_find_free_temporary(c);
|
||||
struct rc_instruction *inst;
|
||||
struct rc_instruction *insert_pos = c->Program.Instructions.Prev;
|
||||
struct rc_instruction *last_write_inst = NULL;
|
||||
unsigned branch_depth = 0;
|
||||
unsigned loop_depth = 0;
|
||||
bool emit_after_control_flow = false;
|
||||
unsigned num_writes = 0;
|
||||
|
||||
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
|
||||
loop_depth++;
|
||||
if (inst->U.I.Opcode == RC_OPCODE_IF)
|
||||
branch_depth++;
|
||||
if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
|
||||
(inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
|
||||
if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
|
||||
insert_pos = inst;
|
||||
emit_after_control_flow = false;
|
||||
}
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
|
||||
loop_depth++;
|
||||
if (inst->U.I.Opcode == RC_OPCODE_IF)
|
||||
branch_depth++;
|
||||
if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
|
||||
(inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
|
||||
if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
|
||||
insert_pos = inst;
|
||||
emit_after_control_flow = false;
|
||||
}
|
||||
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
|
||||
num_writes++;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tempreg;
|
||||
insert_pos = inst;
|
||||
last_write_inst = inst;
|
||||
if (loop_depth != 0 && branch_depth != 0)
|
||||
emit_after_control_flow = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
|
||||
num_writes++;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tempreg;
|
||||
insert_pos = inst;
|
||||
last_write_inst = inst;
|
||||
if (loop_depth != 0 && branch_depth != 0)
|
||||
emit_after_control_flow = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If there is only a single write, just duplicate the whole instruction instead.
|
||||
* We can do this even when the single write was is a control flow.
|
||||
*/
|
||||
if (num_writes == 1) {
|
||||
last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
last_write_inst->U.I.DstReg.Index = output;
|
||||
/* If there is only a single write, just duplicate the whole instruction instead.
|
||||
* We can do this even when the single write was is a control flow.
|
||||
*/
|
||||
if (num_writes == 1) {
|
||||
last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
last_write_inst->U.I.DstReg.Index = output;
|
||||
|
||||
inst = rc_insert_new_instruction(c, last_write_inst);
|
||||
struct rc_instruction * prev = inst->Prev;
|
||||
struct rc_instruction * next = inst->Next;
|
||||
memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
|
||||
inst->Prev = prev;
|
||||
inst->Next = next;
|
||||
inst->U.I.DstReg.Index = dup_output;
|
||||
} else {
|
||||
inst = rc_insert_new_instruction(c, insert_pos);
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
inst->U.I.DstReg.Index = output;
|
||||
inst = rc_insert_new_instruction(c, last_write_inst);
|
||||
struct rc_instruction *prev = inst->Prev;
|
||||
struct rc_instruction *next = inst->Next;
|
||||
memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
|
||||
inst->Prev = prev;
|
||||
inst->Next = next;
|
||||
inst->U.I.DstReg.Index = dup_output;
|
||||
} else {
|
||||
inst = rc_insert_new_instruction(c, insert_pos);
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
inst->U.I.DstReg.Index = output;
|
||||
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = tempreg;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = tempreg;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
|
||||
inst = rc_insert_new_instruction(c, inst);
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
inst->U.I.DstReg.Index = dup_output;
|
||||
inst = rc_insert_new_instruction(c, inst);
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
|
||||
inst->U.I.DstReg.Index = dup_output;
|
||||
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = tempreg;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
}
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = tempreg;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
}
|
||||
|
||||
c->Program.OutputsWritten |= 1U << dup_output;
|
||||
c->Program.OutputsWritten |= 1U << dup_output;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Introduce standard code fragment to deal with fragment.position.
|
||||
*/
|
||||
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
|
||||
int full_vtransform)
|
||||
void
|
||||
rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input,
|
||||
int full_vtransform)
|
||||
{
|
||||
unsigned tempregi = rc_find_free_temporary(c);
|
||||
struct rc_instruction * inst_rcp;
|
||||
struct rc_instruction * inst_mul;
|
||||
struct rc_instruction * inst_mad;
|
||||
struct rc_instruction * inst;
|
||||
unsigned tempregi = rc_find_free_temporary(c);
|
||||
struct rc_instruction *inst_rcp;
|
||||
struct rc_instruction *inst_mul;
|
||||
struct rc_instruction *inst_mad;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
c->Program.InputsRead &= ~(1U << wpos);
|
||||
c->Program.InputsRead |= 1U << new_input;
|
||||
c->Program.InputsRead &= ~(1U << wpos);
|
||||
c->Program.InputsRead |= 1U << new_input;
|
||||
|
||||
/* perspective divide */
|
||||
inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
/* perspective divide */
|
||||
inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = tempregi;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = tempregi;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
|
||||
inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
|
||||
inst_rcp->U.I.SrcReg[0].Index = new_input;
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
|
||||
inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
|
||||
inst_rcp->U.I.SrcReg[0].Index = new_input;
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
|
||||
|
||||
inst_mul = rc_insert_new_instruction(c, inst_rcp);
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mul = rc_insert_new_instruction(c, inst_rcp);
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = tempregi;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = tempregi;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
|
||||
inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
|
||||
inst_mul->U.I.SrcReg[0].Index = new_input;
|
||||
inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
|
||||
inst_mul->U.I.SrcReg[0].Index = new_input;
|
||||
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = tempregi;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = tempregi;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
|
||||
/* viewport transformation */
|
||||
inst_mad = rc_insert_new_instruction(c, inst_mul);
|
||||
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
|
||||
/* viewport transformation */
|
||||
inst_mad = rc_insert_new_instruction(c, inst_mul);
|
||||
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
|
||||
|
||||
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.DstReg.Index = tempregi;
|
||||
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.DstReg.Index = tempregi;
|
||||
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
|
||||
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.SrcReg[0].Index = tempregi;
|
||||
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.SrcReg[0].Index = tempregi;
|
||||
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
|
||||
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
|
||||
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
|
||||
if (full_vtransform) {
|
||||
inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
|
||||
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
|
||||
} else {
|
||||
inst_mad->U.I.SrcReg[1].Index =
|
||||
inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
|
||||
}
|
||||
if (full_vtransform) {
|
||||
inst_mad->U.I.SrcReg[1].Index =
|
||||
rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
|
||||
inst_mad->U.I.SrcReg[2].Index =
|
||||
rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
|
||||
} else {
|
||||
inst_mad->U.I.SrcReg[1].Index = inst_mad->U.I.SrcReg[2].Index =
|
||||
rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
|
||||
}
|
||||
|
||||
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned i;
|
||||
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned i;
|
||||
|
||||
for(i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
|
||||
inst->U.I.SrcReg[i].Index == wpos) {
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[i].Index = tempregi;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == wpos) {
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[i].Index = tempregi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
|
||||
* Gallium and OpenGL define it the other way around.
|
||||
|
|
@ -307,203 +312,200 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
|
|||
* So let's just negate FACE at the beginning of the shader and rewrite the rest
|
||||
* of the shader to read from the newly allocated temporary.
|
||||
*/
|
||||
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
|
||||
void
|
||||
rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
|
||||
{
|
||||
unsigned tempregi = rc_find_free_temporary(c);
|
||||
struct rc_instruction *inst_add;
|
||||
struct rc_instruction *inst;
|
||||
unsigned tempregi = rc_find_free_temporary(c);
|
||||
struct rc_instruction *inst_add;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
/* perspective divide */
|
||||
inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
/* perspective divide */
|
||||
inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = tempregi;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = tempregi;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
|
||||
|
||||
inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
|
||||
inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
|
||||
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
|
||||
inst_add->U.I.SrcReg[1].Index = face;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
|
||||
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
|
||||
inst_add->U.I.SrcReg[1].Index = face;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
|
||||
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
|
||||
|
||||
for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned i;
|
||||
for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned i;
|
||||
|
||||
for(i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
|
||||
inst->U.I.SrcReg[i].Index == face) {
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[i].Index = tempregi;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == face) {
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[i].Index = tempregi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
static void
|
||||
reg_count_callback(void *userdata, struct rc_instruction *inst, rc_register_file file,
|
||||
unsigned int index, unsigned int mask)
|
||||
{
|
||||
struct rc_program_stats *s = userdata;
|
||||
if (file == RC_FILE_TEMPORARY)
|
||||
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
|
||||
if (file == RC_FILE_INLINE)
|
||||
s->num_inline_literals++;
|
||||
if (file == RC_FILE_CONSTANT)
|
||||
s->num_consts = MAX2(s->num_consts, index + 1);
|
||||
struct rc_program_stats *s = userdata;
|
||||
if (file == RC_FILE_TEMPORARY)
|
||||
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
|
||||
if (file == RC_FILE_INLINE)
|
||||
s->num_inline_literals++;
|
||||
if (file == RC_FILE_CONSTANT)
|
||||
s->num_consts = MAX2(s->num_consts, index + 1);
|
||||
}
|
||||
|
||||
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
|
||||
void
|
||||
rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
|
||||
{
|
||||
struct rc_instruction * tmp;
|
||||
memset(s, 0, sizeof(*s));
|
||||
unsigned ip = 0;
|
||||
int last_begintex = -1;
|
||||
struct rc_instruction *tmp;
|
||||
memset(s, 0, sizeof(*s));
|
||||
unsigned ip = 0;
|
||||
int last_begintex = -1;
|
||||
|
||||
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
|
||||
tmp = tmp->Next, ip++){
|
||||
const struct rc_opcode_info * info;
|
||||
rc_for_all_reads_mask(tmp, reg_count_callback, s);
|
||||
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
|
||||
info = rc_get_opcode_info(tmp->U.I.Opcode);
|
||||
if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
|
||||
/* The R5xx docs mention ~30 cycles in section 8.3.1
|
||||
* The only case when we don't want to add the cycles
|
||||
* penalty is when the texblock contains only kil.
|
||||
*/
|
||||
const struct rc_opcode_info *next_op
|
||||
= rc_get_opcode_info(tmp->Next->U.I.Opcode);
|
||||
struct rc_instruction *second_next_instr = tmp->Next->Next;
|
||||
const struct rc_opcode_info *second_next_op;
|
||||
if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) {
|
||||
second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode);
|
||||
} else {
|
||||
second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode);
|
||||
}
|
||||
if (next_op->Opcode != RC_OPCODE_KIL ||
|
||||
(second_next_instr->Type == RC_INSTRUCTION_NORMAL &&
|
||||
second_next_op->HasTexture)) {
|
||||
s->num_cycles += 30;
|
||||
last_begintex = ip;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (info->Opcode == RC_OPCODE_MAD &&
|
||||
rc_inst_has_three_diff_temp_srcs(tmp))
|
||||
s->num_cycles++;
|
||||
} else {
|
||||
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
s->num_presub_ops++;
|
||||
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
s->num_presub_ops++;
|
||||
/* Assuming alpha will never be a flow control or
|
||||
* a tex instruction. */
|
||||
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
|
||||
s->num_alpha_insts++;
|
||||
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
|
||||
s->num_rgb_insts++;
|
||||
if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
|
||||
tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
|
||||
s->num_omod_ops++;
|
||||
}
|
||||
if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
|
||||
tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
|
||||
s->num_omod_ops++;
|
||||
}
|
||||
if (tmp->U.P.Nop)
|
||||
s->num_cycles++;
|
||||
/* SemWait has effect only on R500, the more instructions we can put
|
||||
* between the tex block and the first texture semaphore, the better.
|
||||
*/
|
||||
if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
|
||||
s->num_cycles -= MIN2(30, ip - last_begintex);
|
||||
last_begintex = -1;
|
||||
}
|
||||
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
|
||||
}
|
||||
if (info->IsFlowControl) {
|
||||
s->num_fc_insts++;
|
||||
if (info->Opcode == RC_OPCODE_BGNLOOP)
|
||||
s->num_loops++;
|
||||
}
|
||||
/* VS flow control was already translated to the predicate instructions */
|
||||
if (c->type == RC_VERTEX_PROGRAM)
|
||||
if (strstr(info->Name, "PRED") != NULL)
|
||||
s->num_pred_insts++;
|
||||
for (tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
|
||||
tmp = tmp->Next, ip++) {
|
||||
const struct rc_opcode_info *info;
|
||||
rc_for_all_reads_mask(tmp, reg_count_callback, s);
|
||||
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
|
||||
info = rc_get_opcode_info(tmp->U.I.Opcode);
|
||||
if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
|
||||
/* The R5xx docs mention ~30 cycles in section 8.3.1
|
||||
* The only case when we don't want to add the cycles
|
||||
* penalty is when the texblock contains only kil.
|
||||
*/
|
||||
const struct rc_opcode_info *next_op = rc_get_opcode_info(tmp->Next->U.I.Opcode);
|
||||
struct rc_instruction *second_next_instr = tmp->Next->Next;
|
||||
const struct rc_opcode_info *second_next_op;
|
||||
if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) {
|
||||
second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode);
|
||||
} else {
|
||||
second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode);
|
||||
}
|
||||
if (next_op->Opcode != RC_OPCODE_KIL ||
|
||||
(second_next_instr->Type == RC_INSTRUCTION_NORMAL && second_next_op->HasTexture)) {
|
||||
s->num_cycles += 30;
|
||||
last_begintex = ip;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (info->Opcode == RC_OPCODE_MAD && rc_inst_has_three_diff_temp_srcs(tmp))
|
||||
s->num_cycles++;
|
||||
} else {
|
||||
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
s->num_presub_ops++;
|
||||
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
s->num_presub_ops++;
|
||||
/* Assuming alpha will never be a flow control or
|
||||
* a tex instruction. */
|
||||
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
|
||||
s->num_alpha_insts++;
|
||||
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
|
||||
s->num_rgb_insts++;
|
||||
if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
|
||||
s->num_omod_ops++;
|
||||
}
|
||||
if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
|
||||
s->num_omod_ops++;
|
||||
}
|
||||
if (tmp->U.P.Nop)
|
||||
s->num_cycles++;
|
||||
/* SemWait has effect only on R500, the more instructions we can put
|
||||
* between the tex block and the first texture semaphore, the better.
|
||||
*/
|
||||
if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
|
||||
s->num_cycles -= MIN2(30, ip - last_begintex);
|
||||
last_begintex = -1;
|
||||
}
|
||||
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
|
||||
}
|
||||
if (info->IsFlowControl) {
|
||||
s->num_fc_insts++;
|
||||
if (info->Opcode == RC_OPCODE_BGNLOOP)
|
||||
s->num_loops++;
|
||||
}
|
||||
/* VS flow control was already translated to the predicate instructions */
|
||||
if (c->type == RC_VERTEX_PROGRAM)
|
||||
if (strstr(info->Name, "PRED") != NULL)
|
||||
s->num_pred_insts++;
|
||||
|
||||
if (info->HasTexture)
|
||||
s->num_tex_insts++;
|
||||
s->num_insts++;
|
||||
s->num_cycles++;
|
||||
}
|
||||
/* Increment here because the reg_count_callback store the max
|
||||
* temporary reg index in s->nun_temp_regs. */
|
||||
s->num_temp_regs++;
|
||||
if (info->HasTexture)
|
||||
s->num_tex_insts++;
|
||||
s->num_insts++;
|
||||
s->num_cycles++;
|
||||
}
|
||||
/* Increment here because the reg_count_callback store the max
|
||||
* temporary reg index in s->nun_temp_regs. */
|
||||
s->num_temp_regs++;
|
||||
}
|
||||
|
||||
static void print_stats(struct radeon_compiler * c)
|
||||
static void
|
||||
print_stats(struct radeon_compiler *c)
|
||||
{
|
||||
struct rc_program_stats s;
|
||||
struct rc_program_stats s;
|
||||
|
||||
rc_get_stats(c, &s);
|
||||
rc_get_stats(c, &s);
|
||||
|
||||
/* Note that we print some dummy values for instruction categories that
|
||||
* only the FS has, because shader-db's report.py wants all shaders to
|
||||
* have the same set.
|
||||
*/
|
||||
util_debug_message(c->debug, SHADER_INFO,
|
||||
"%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, "
|
||||
"%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
|
||||
c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
|
||||
s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
|
||||
s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
|
||||
s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals,
|
||||
s.num_cycles);
|
||||
/* Note that we print some dummy values for instruction categories that
|
||||
* only the FS has, because shader-db's report.py wants all shaders to
|
||||
* have the same set.
|
||||
*/
|
||||
util_debug_message(
|
||||
c->debug, SHADER_INFO,
|
||||
"%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, "
|
||||
"%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
|
||||
c->type == RC_VERTEX_PROGRAM ? "VS" : "FS", s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
|
||||
s.num_pred_insts, s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
|
||||
s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals, s.num_cycles);
|
||||
}
|
||||
|
||||
static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
|
||||
"Vertex Program",
|
||||
"Fragment Program"
|
||||
};
|
||||
static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {"Vertex Program", "Fragment Program"};
|
||||
|
||||
bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
|
||||
bool
|
||||
rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
|
||||
{
|
||||
for (unsigned i = 0; list[i].name; i++) {
|
||||
if (list[i].predicate) {
|
||||
list[i].run(c, list[i].user);
|
||||
for (unsigned i = 0; list[i].name; i++) {
|
||||
if (list[i].predicate) {
|
||||
list[i].run(c, list[i].user);
|
||||
|
||||
if (c->Error)
|
||||
return false;
|
||||
if (c->Error)
|
||||
return false;
|
||||
|
||||
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
|
||||
fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
|
||||
rc_print_program(&c->Program);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
|
||||
fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
|
||||
rc_print_program(&c->Program);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Executes a list of compiler passes given in the parameter 'list'. */
|
||||
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
|
||||
void
|
||||
rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
|
||||
{
|
||||
if (c->Debug & RC_DBG_LOG) {
|
||||
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
|
||||
rc_print_program(&c->Program);
|
||||
}
|
||||
if (c->Debug & RC_DBG_LOG) {
|
||||
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
|
||||
rc_print_program(&c->Program);
|
||||
}
|
||||
|
||||
if(rc_run_compiler_passes(c, list)) {
|
||||
print_stats(c);
|
||||
}
|
||||
if (rc_run_compiler_passes(c, list)) {
|
||||
print_stats(c);
|
||||
}
|
||||
}
|
||||
|
||||
void rc_validate_final_shader(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
rc_validate_final_shader(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
/* Check the number of constants. */
|
||||
if (c->Program.Constants.Count > c->max_constants) {
|
||||
rc_error(c, "Too many constants. Max: %i, Got: %i\n",
|
||||
c->max_constants, c->Program.Constants.Count);
|
||||
}
|
||||
/* Check the number of constants. */
|
||||
if (c->Program.Constants.Count > c->max_constants) {
|
||||
rc_error(c, "Too many constants. Max: %i, Got: %i\n", c->max_constants,
|
||||
c->Program.Constants.Count);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,59 +12,55 @@
|
|||
#include "radeon_code.h"
|
||||
#include "radeon_program.h"
|
||||
|
||||
#define RC_DBG_LOG (1 << 0)
|
||||
#define RC_DBG_LOG (1 << 0)
|
||||
|
||||
struct rc_swizzle_caps;
|
||||
|
||||
enum rc_program_type {
|
||||
RC_VERTEX_PROGRAM,
|
||||
RC_FRAGMENT_PROGRAM,
|
||||
RC_NUM_PROGRAM_TYPES
|
||||
};
|
||||
enum rc_program_type { RC_VERTEX_PROGRAM, RC_FRAGMENT_PROGRAM, RC_NUM_PROGRAM_TYPES };
|
||||
|
||||
struct radeon_compiler {
|
||||
struct memory_pool Pool;
|
||||
struct rc_program Program;
|
||||
const struct rc_regalloc_state *regalloc_state;
|
||||
struct util_debug_callback *debug;
|
||||
enum rc_program_type type;
|
||||
unsigned Debug:2;
|
||||
unsigned Error:1;
|
||||
char * ErrorMsg;
|
||||
struct memory_pool Pool;
|
||||
struct rc_program Program;
|
||||
const struct rc_regalloc_state *regalloc_state;
|
||||
struct util_debug_callback *debug;
|
||||
enum rc_program_type type;
|
||||
unsigned Debug : 2;
|
||||
unsigned Error : 1;
|
||||
char *ErrorMsg;
|
||||
|
||||
/* Hardware specification. */
|
||||
unsigned is_r400:1;
|
||||
unsigned is_r500:1;
|
||||
unsigned has_half_swizzles:1;
|
||||
unsigned has_presub:1;
|
||||
unsigned has_omod:1;
|
||||
unsigned disable_optimizations:1;
|
||||
unsigned max_temp_regs;
|
||||
unsigned max_constants;
|
||||
int max_alu_insts;
|
||||
unsigned max_tex_insts;
|
||||
/* Hardware specification. */
|
||||
unsigned is_r400 : 1;
|
||||
unsigned is_r500 : 1;
|
||||
unsigned has_half_swizzles : 1;
|
||||
unsigned has_presub : 1;
|
||||
unsigned has_omod : 1;
|
||||
unsigned disable_optimizations : 1;
|
||||
unsigned max_temp_regs;
|
||||
unsigned max_constants;
|
||||
int max_alu_insts;
|
||||
unsigned max_tex_insts;
|
||||
|
||||
int max_temp_index;
|
||||
int max_temp_index;
|
||||
|
||||
/* Whether to remove unused constants and empty holes in constant space. */
|
||||
unsigned remove_unused_constants:1;
|
||||
/* Whether to remove unused constants and empty holes in constant space. */
|
||||
unsigned remove_unused_constants : 1;
|
||||
|
||||
/**
|
||||
* Variables used internally, not be touched by callers
|
||||
* of the compiler
|
||||
*/
|
||||
/*@{*/
|
||||
const struct rc_swizzle_caps * SwizzleCaps;
|
||||
/*@}*/
|
||||
/**
|
||||
* Variables used internally, not be touched by callers
|
||||
* of the compiler
|
||||
*/
|
||||
/*@{*/
|
||||
const struct rc_swizzle_caps *SwizzleCaps;
|
||||
/*@}*/
|
||||
};
|
||||
|
||||
void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs);
|
||||
void rc_destroy(struct radeon_compiler * c);
|
||||
void rc_init(struct radeon_compiler *c, const struct rc_regalloc_state *rs);
|
||||
void rc_destroy(struct radeon_compiler *c);
|
||||
|
||||
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
|
||||
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
|
||||
void rc_debug(struct radeon_compiler *c, const char *fmt, ...);
|
||||
void rc_error(struct radeon_compiler *c, const char *fmt, ...);
|
||||
|
||||
int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
|
||||
int rc_if_fail_helper(struct radeon_compiler *c, const char *file, int line, const char *assertion);
|
||||
|
||||
/**
|
||||
* This macro acts like an if-statement that can be used to implement
|
||||
|
|
@ -78,71 +74,68 @@ int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, c
|
|||
* if (rc_assert(c, condition-that-must-be-true))
|
||||
* return;
|
||||
*/
|
||||
#define rc_assert(c, cond) \
|
||||
(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
|
||||
#define rc_assert(c, cond) (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
|
||||
|
||||
void rc_mark_unused_channels(struct radeon_compiler * c, void *user);
|
||||
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
|
||||
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
|
||||
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
|
||||
void rc_mark_unused_channels(struct radeon_compiler *c, void *user);
|
||||
void rc_calculate_inputs_outputs(struct radeon_compiler *c);
|
||||
void rc_copy_output(struct radeon_compiler *c, unsigned output, unsigned dup_output);
|
||||
void rc_transform_fragment_wpos(struct radeon_compiler *c, unsigned wpos, unsigned new_input,
|
||||
int full_vtransform);
|
||||
void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
|
||||
|
||||
struct r300_fragment_program_compiler {
|
||||
struct radeon_compiler Base;
|
||||
struct rX00_fragment_program_code *code;
|
||||
/* Optional transformations and features. */
|
||||
struct r300_fragment_program_external_state state;
|
||||
/* Register corresponding to the depthbuffer. */
|
||||
unsigned OutputDepth;
|
||||
/* Registers corresponding to the four colorbuffers. */
|
||||
unsigned OutputColor[4];
|
||||
struct radeon_compiler Base;
|
||||
struct rX00_fragment_program_code *code;
|
||||
/* Optional transformations and features. */
|
||||
struct r300_fragment_program_external_state state;
|
||||
/* Register corresponding to the depthbuffer. */
|
||||
unsigned OutputDepth;
|
||||
/* Registers corresponding to the four colorbuffers. */
|
||||
unsigned OutputColor[4];
|
||||
|
||||
void * UserData;
|
||||
void (*AllocateHwInputs)(
|
||||
struct r300_fragment_program_compiler * c,
|
||||
void (*allocate)(void * data, unsigned input, unsigned hwreg),
|
||||
void * mydata);
|
||||
void *UserData;
|
||||
void (*AllocateHwInputs)(struct r300_fragment_program_compiler *c,
|
||||
void (*allocate)(void *data, unsigned input, unsigned hwreg),
|
||||
void *mydata);
|
||||
};
|
||||
|
||||
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
|
||||
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler *c);
|
||||
|
||||
struct r300_vertex_program_compiler {
|
||||
struct radeon_compiler Base;
|
||||
struct r300_vertex_program_code *code;
|
||||
uint32_t RequiredOutputs;
|
||||
|
||||
void * UserData;
|
||||
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
|
||||
struct radeon_compiler Base;
|
||||
struct r300_vertex_program_code *code;
|
||||
uint32_t RequiredOutputs;
|
||||
|
||||
void *UserData;
|
||||
void (*SetHwInputOutput)(struct r300_vertex_program_compiler *c);
|
||||
};
|
||||
|
||||
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
|
||||
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c);
|
||||
void rc_vert_fc(struct radeon_compiler *compiler, void *user);
|
||||
void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
|
||||
|
||||
struct radeon_compiler_pass {
|
||||
const char *name; /* Name of the pass. */
|
||||
int dump; /* Dump the program if Debug == 1? */
|
||||
int predicate; /* Run this pass? */
|
||||
void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
|
||||
void *user; /* Optional parameter which is passed to the run function. */
|
||||
const char *name; /* Name of the pass. */
|
||||
int dump; /* Dump the program if Debug == 1? */
|
||||
int predicate; /* Run this pass? */
|
||||
void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
|
||||
void *user; /* Optional parameter which is passed to the run function. */
|
||||
};
|
||||
|
||||
struct rc_program_stats {
|
||||
unsigned num_cycles;
|
||||
unsigned num_consts;
|
||||
unsigned num_insts;
|
||||
unsigned num_fc_insts;
|
||||
unsigned num_tex_insts;
|
||||
unsigned num_rgb_insts;
|
||||
unsigned num_alpha_insts;
|
||||
unsigned num_pred_insts;
|
||||
unsigned num_presub_ops;
|
||||
unsigned num_temp_regs;
|
||||
unsigned num_omod_ops;
|
||||
unsigned num_inline_literals;
|
||||
unsigned num_loops;
|
||||
unsigned num_cycles;
|
||||
unsigned num_consts;
|
||||
unsigned num_insts;
|
||||
unsigned num_fc_insts;
|
||||
unsigned num_tex_insts;
|
||||
unsigned num_rgb_insts;
|
||||
unsigned num_alpha_insts;
|
||||
unsigned num_pred_insts;
|
||||
unsigned num_presub_ops;
|
||||
unsigned num_temp_regs;
|
||||
unsigned num_omod_ops;
|
||||
unsigned num_inline_literals;
|
||||
unsigned num_loops;
|
||||
};
|
||||
|
||||
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,9 +24,8 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
|
|||
|
||||
unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
|
||||
|
||||
unsigned int combine_swizzles4(unsigned int src,
|
||||
rc_swizzle swz_x, rc_swizzle swz_y,
|
||||
rc_swizzle swz_z, rc_swizzle swz_w);
|
||||
unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y,
|
||||
rc_swizzle swz_z, rc_swizzle swz_w);
|
||||
|
||||
unsigned int combine_swizzles(unsigned int src, unsigned int swz);
|
||||
|
||||
|
|
@ -34,75 +33,49 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask);
|
|||
|
||||
unsigned swizzle_mask(unsigned swizzle, unsigned mask);
|
||||
|
||||
unsigned int rc_adjust_channels(
|
||||
unsigned int old_swizzle,
|
||||
unsigned int conversion_swizzle);
|
||||
unsigned int rc_adjust_channels(unsigned int old_swizzle, unsigned int conversion_swizzle);
|
||||
|
||||
void rc_pair_rewrite_writemask(
|
||||
struct rc_pair_sub_instruction * sub,
|
||||
unsigned int conversion_swizzle);
|
||||
void rc_pair_rewrite_writemask(struct rc_pair_sub_instruction *sub,
|
||||
unsigned int conversion_swizzle);
|
||||
|
||||
void rc_normal_rewrite_writemask(
|
||||
struct rc_instruction * inst,
|
||||
unsigned int conversion_swizzle);
|
||||
void rc_normal_rewrite_writemask(struct rc_instruction *inst, unsigned int conversion_swizzle);
|
||||
|
||||
unsigned int rc_rewrite_swizzle(
|
||||
unsigned int swizzle,
|
||||
unsigned int new_mask);
|
||||
unsigned int rc_rewrite_swizzle(unsigned int swizzle, unsigned int new_mask);
|
||||
|
||||
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
|
||||
|
||||
void reset_srcreg(struct rc_src_register* reg);
|
||||
void reset_srcreg(struct rc_src_register *reg);
|
||||
|
||||
unsigned int rc_src_reads_dst_mask(
|
||||
rc_register_file src_file,
|
||||
unsigned int src_idx,
|
||||
unsigned int src_swz,
|
||||
rc_register_file dst_file,
|
||||
unsigned int dst_idx,
|
||||
unsigned int dst_mask);
|
||||
unsigned int rc_src_reads_dst_mask(rc_register_file src_file, unsigned int src_idx,
|
||||
unsigned int src_swz, rc_register_file dst_file,
|
||||
unsigned int dst_idx, unsigned int dst_mask);
|
||||
|
||||
unsigned int rc_source_type_swz(unsigned int swizzle);
|
||||
|
||||
unsigned int rc_source_type_mask(unsigned int mask);
|
||||
|
||||
unsigned int rc_inst_can_use_presub(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
rc_presubtract_op presub_op,
|
||||
unsigned int presub_writemask,
|
||||
const struct rc_src_register * replace_reg,
|
||||
const struct rc_src_register * presub_src0,
|
||||
const struct rc_src_register * presub_src1);
|
||||
unsigned int rc_inst_can_use_presub(struct radeon_compiler *c, struct rc_instruction *inst,
|
||||
rc_presubtract_op presub_op, unsigned int presub_writemask,
|
||||
const struct rc_src_register *replace_reg,
|
||||
const struct rc_src_register *presub_src0,
|
||||
const struct rc_src_register *presub_src1);
|
||||
|
||||
int rc_get_max_index(
|
||||
struct radeon_compiler * c,
|
||||
rc_register_file file);
|
||||
int rc_get_max_index(struct radeon_compiler *c, rc_register_file file);
|
||||
|
||||
void rc_pair_remove_src(struct rc_instruction * inst,
|
||||
unsigned int src_type,
|
||||
unsigned int source);
|
||||
void rc_pair_remove_src(struct rc_instruction *inst, unsigned int src_type, unsigned int source);
|
||||
|
||||
rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
|
||||
rc_opcode rc_get_flow_control_inst(struct rc_instruction *inst);
|
||||
|
||||
struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
|
||||
struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
|
||||
struct rc_instruction *rc_match_endloop(struct rc_instruction *endloop);
|
||||
struct rc_instruction *rc_match_bgnloop(struct rc_instruction *bgnloop);
|
||||
|
||||
unsigned int rc_make_conversion_swizzle(
|
||||
unsigned int old_mask,
|
||||
unsigned int new_mask);
|
||||
unsigned int rc_make_conversion_swizzle(unsigned int old_mask, unsigned int new_mask);
|
||||
|
||||
unsigned int rc_src_reg_is_immediate(
|
||||
struct radeon_compiler * c,
|
||||
unsigned int file,
|
||||
unsigned int index);
|
||||
unsigned int rc_src_reg_is_immediate(struct radeon_compiler *c, unsigned int file,
|
||||
unsigned int index);
|
||||
|
||||
float rc_get_constant_value(
|
||||
struct radeon_compiler * c,
|
||||
unsigned int index,
|
||||
unsigned int swizzle,
|
||||
unsigned int negate,
|
||||
unsigned int chan);
|
||||
float rc_get_constant_value(struct radeon_compiler *c, unsigned int index, unsigned int swizzle,
|
||||
unsigned int negate, unsigned int chan);
|
||||
|
||||
unsigned int rc_get_scalar_src_swz(unsigned int swizzle);
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -18,98 +18,88 @@ struct rc_pair_instruction_source;
|
|||
struct rc_pair_sub_instruction;
|
||||
struct rc_compiler;
|
||||
|
||||
|
||||
/**
|
||||
* Help analyze and modify the register accesses of instructions.
|
||||
*/
|
||||
/*@{*/
|
||||
typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int chan);
|
||||
void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
|
||||
void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
|
||||
typedef void (*rc_read_write_chan_fn)(void *userdata, struct rc_instruction *inst,
|
||||
rc_register_file file, unsigned int index, unsigned int chan);
|
||||
void rc_for_all_reads_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata);
|
||||
void rc_for_all_writes_chan(struct rc_instruction *inst, rc_read_write_chan_fn cb, void *userdata);
|
||||
|
||||
typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask);
|
||||
void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
|
||||
void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
|
||||
typedef void (*rc_read_write_mask_fn)(void *userdata, struct rc_instruction *inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask);
|
||||
void rc_for_all_reads_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata);
|
||||
void rc_for_all_writes_mask(struct rc_instruction *inst, rc_read_write_mask_fn cb, void *userdata);
|
||||
|
||||
typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
|
||||
struct rc_src_register * src);
|
||||
void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
|
||||
void * userdata);
|
||||
typedef void (*rc_read_src_fn)(void *userdata, struct rc_instruction *inst,
|
||||
struct rc_src_register *src);
|
||||
void rc_for_all_reads_src(struct rc_instruction *inst, rc_read_src_fn cb, void *userdata);
|
||||
|
||||
typedef void (*rc_pair_read_arg_fn)(void * userdata,
|
||||
struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
|
||||
struct rc_pair_instruction_source * src);
|
||||
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
|
||||
rc_pair_read_arg_fn cb, void * userdata);
|
||||
typedef void (*rc_pair_read_arg_fn)(void *userdata, struct rc_instruction *inst,
|
||||
struct rc_pair_instruction_arg *arg,
|
||||
struct rc_pair_instruction_source *src);
|
||||
void rc_pair_for_all_reads_arg(struct rc_instruction *inst, rc_pair_read_arg_fn cb, void *userdata);
|
||||
|
||||
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file * pfile, unsigned int * pindex);
|
||||
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
|
||||
typedef void (*rc_remap_register_fn)(void *userdata, struct rc_instruction *inst,
|
||||
rc_register_file *pfile, unsigned int *pindex);
|
||||
void rc_remap_registers(struct rc_instruction *inst, rc_remap_register_fn cb, void *userdata);
|
||||
/*@}*/
|
||||
|
||||
struct rc_reader {
|
||||
struct rc_instruction * Inst;
|
||||
unsigned int WriteMask;
|
||||
union {
|
||||
struct {
|
||||
struct rc_src_register * Src;
|
||||
} I;
|
||||
struct {
|
||||
struct rc_pair_instruction_arg * Arg;
|
||||
struct rc_pair_instruction_source * Src;
|
||||
} P;
|
||||
} U;
|
||||
struct rc_instruction *Inst;
|
||||
unsigned int WriteMask;
|
||||
union {
|
||||
struct {
|
||||
struct rc_src_register *Src;
|
||||
} I;
|
||||
struct {
|
||||
struct rc_pair_instruction_arg *Arg;
|
||||
struct rc_pair_instruction_source *Src;
|
||||
} P;
|
||||
} U;
|
||||
};
|
||||
|
||||
struct rc_reader_data {
|
||||
struct radeon_compiler * C;
|
||||
struct radeon_compiler *C;
|
||||
|
||||
unsigned int Abort;
|
||||
unsigned int AbortOnRead;
|
||||
unsigned int AbortOnWrite;
|
||||
unsigned int LoopDepth;
|
||||
unsigned int InElse;
|
||||
struct rc_instruction * Writer;
|
||||
unsigned int Abort;
|
||||
unsigned int AbortOnRead;
|
||||
unsigned int AbortOnWrite;
|
||||
unsigned int LoopDepth;
|
||||
unsigned int InElse;
|
||||
struct rc_instruction *Writer;
|
||||
|
||||
unsigned int ReaderCount;
|
||||
unsigned int ReadersReserved;
|
||||
struct rc_reader * Readers;
|
||||
unsigned int ReaderCount;
|
||||
unsigned int ReadersReserved;
|
||||
struct rc_reader *Readers;
|
||||
|
||||
/* If this flag is enabled, rc_get_readers will exit as soon possible
|
||||
* after the Abort flag is set.*/
|
||||
unsigned int ExitOnAbort;
|
||||
void * CbData;
|
||||
/* If this flag is enabled, rc_get_readers will exit as soon possible
|
||||
* after the Abort flag is set.*/
|
||||
unsigned int ExitOnAbort;
|
||||
void *CbData;
|
||||
};
|
||||
|
||||
void rc_get_readers(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * writer,
|
||||
struct rc_reader_data * data,
|
||||
rc_read_src_fn read_normal_cb,
|
||||
rc_pair_read_arg_fn read_pair_cb,
|
||||
rc_read_write_mask_fn write_cb);
|
||||
void rc_get_readers(struct radeon_compiler *c, struct rc_instruction *writer,
|
||||
struct rc_reader_data *data, rc_read_src_fn read_normal_cb,
|
||||
rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb);
|
||||
|
||||
void rc_get_readers_sub(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * writer,
|
||||
struct rc_pair_sub_instruction * sub_writer,
|
||||
struct rc_reader_data * data,
|
||||
rc_read_src_fn read_normal_cb,
|
||||
rc_pair_read_arg_fn read_pair_cb,
|
||||
rc_read_write_mask_fn write_cb);
|
||||
void rc_get_readers_sub(struct radeon_compiler *c, struct rc_instruction *writer,
|
||||
struct rc_pair_sub_instruction *sub_writer, struct rc_reader_data *data,
|
||||
rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb,
|
||||
rc_read_write_mask_fn write_cb);
|
||||
/**
|
||||
* Compiler passes based on dataflow analysis.
|
||||
*/
|
||||
/*@{*/
|
||||
typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
|
||||
void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
|
||||
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
|
||||
void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
|
||||
typedef void (*rc_dataflow_mark_outputs_fn)(void *userdata, void *data,
|
||||
void (*mark_fn)(void *data, unsigned int index,
|
||||
unsigned int mask));
|
||||
void rc_dataflow_deadcode(struct radeon_compiler *c, void *user);
|
||||
void rc_dataflow_swizzles(struct radeon_compiler *c, void *user);
|
||||
/*@}*/
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user);
|
||||
void rc_optimize(struct radeon_compiler *c, void *user);
|
||||
void rc_inline_literals(struct radeon_compiler *c, void *user);
|
||||
int rc_opt_presubtract(struct radeon_compiler *c, struct rc_instruction *inst, void *data);
|
||||
|
||||
|
|
|
|||
|
|
@ -7,323 +7,319 @@
|
|||
|
||||
#include "radeon_compiler.h"
|
||||
|
||||
|
||||
struct updatemask_state {
|
||||
unsigned char Output[RC_REGISTER_MAX_INDEX];
|
||||
unsigned char Temporary[RC_REGISTER_MAX_INDEX];
|
||||
unsigned char Address;
|
||||
unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
|
||||
unsigned char Output[RC_REGISTER_MAX_INDEX];
|
||||
unsigned char Temporary[RC_REGISTER_MAX_INDEX];
|
||||
unsigned char Address;
|
||||
unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
|
||||
};
|
||||
|
||||
struct instruction_state {
|
||||
unsigned char WriteMask:4;
|
||||
unsigned char WriteALUResult:1;
|
||||
unsigned char SrcReg[3];
|
||||
unsigned char WriteMask : 4;
|
||||
unsigned char WriteALUResult : 1;
|
||||
unsigned char SrcReg[3];
|
||||
};
|
||||
|
||||
struct loopinfo {
|
||||
struct updatemask_state StoreEndloop;
|
||||
unsigned int BreakCount;
|
||||
unsigned int BreaksReserved;
|
||||
struct updatemask_state StoreEndloop;
|
||||
unsigned int BreakCount;
|
||||
unsigned int BreaksReserved;
|
||||
};
|
||||
|
||||
struct branchinfo {
|
||||
unsigned int HaveElse:1;
|
||||
unsigned int HaveElse : 1;
|
||||
|
||||
struct updatemask_state StoreEndif;
|
||||
struct updatemask_state StoreElse;
|
||||
struct updatemask_state StoreEndif;
|
||||
struct updatemask_state StoreElse;
|
||||
};
|
||||
|
||||
struct deadcode_state {
|
||||
struct radeon_compiler * C;
|
||||
struct instruction_state * Instructions;
|
||||
struct radeon_compiler *C;
|
||||
struct instruction_state *Instructions;
|
||||
|
||||
struct updatemask_state R;
|
||||
struct updatemask_state R;
|
||||
|
||||
struct branchinfo * BranchStack;
|
||||
unsigned int BranchStackSize;
|
||||
unsigned int BranchStackReserved;
|
||||
struct branchinfo *BranchStack;
|
||||
unsigned int BranchStackSize;
|
||||
unsigned int BranchStackReserved;
|
||||
|
||||
struct loopinfo * LoopStack;
|
||||
unsigned int LoopStackSize;
|
||||
unsigned int LoopStackReserved;
|
||||
struct loopinfo *LoopStack;
|
||||
unsigned int LoopStackSize;
|
||||
unsigned int LoopStackReserved;
|
||||
};
|
||||
|
||||
|
||||
static void or_updatemasks(
|
||||
struct updatemask_state * dst,
|
||||
struct updatemask_state * a,
|
||||
struct updatemask_state * b)
|
||||
static void
|
||||
or_updatemasks(struct updatemask_state *dst, struct updatemask_state *a, struct updatemask_state *b)
|
||||
{
|
||||
for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
|
||||
dst->Output[i] = a->Output[i] | b->Output[i];
|
||||
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
|
||||
}
|
||||
for (unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
|
||||
dst->Output[i] = a->Output[i] | b->Output[i];
|
||||
dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
|
||||
dst->Special[i] = a->Special[i] | b->Special[i];
|
||||
for (unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
|
||||
dst->Special[i] = a->Special[i] | b->Special[i];
|
||||
|
||||
dst->Address = a->Address | b->Address;
|
||||
dst->Address = a->Address | b->Address;
|
||||
}
|
||||
|
||||
static void push_loop(struct deadcode_state * s)
|
||||
static void
|
||||
push_loop(struct deadcode_state *s)
|
||||
{
|
||||
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
|
||||
s->LoopStackSize, s->LoopStackReserved, 1);
|
||||
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
|
||||
memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
|
||||
memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, s->LoopStackSize,
|
||||
s->LoopStackReserved, 1);
|
||||
memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
|
||||
memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
|
||||
}
|
||||
|
||||
static void push_branch(struct deadcode_state * s)
|
||||
static void
|
||||
push_branch(struct deadcode_state *s)
|
||||
{
|
||||
struct branchinfo * branch;
|
||||
struct branchinfo *branch;
|
||||
|
||||
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
|
||||
s->BranchStackSize, s->BranchStackReserved, 1);
|
||||
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, s->BranchStackSize,
|
||||
s->BranchStackReserved, 1);
|
||||
|
||||
branch = &s->BranchStack[s->BranchStackSize++];
|
||||
branch->HaveElse = 0;
|
||||
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
|
||||
branch = &s->BranchStack[s->BranchStackSize++];
|
||||
branch->HaveElse = 0;
|
||||
memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
|
||||
}
|
||||
|
||||
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
|
||||
static unsigned char *
|
||||
get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
|
||||
{
|
||||
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
|
||||
if (index >= RC_REGISTER_MAX_INDEX) {
|
||||
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
|
||||
return NULL;
|
||||
}
|
||||
if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
|
||||
if (index >= RC_REGISTER_MAX_INDEX) {
|
||||
rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (file == RC_FILE_OUTPUT)
|
||||
return &s->R.Output[index];
|
||||
else
|
||||
return &s->R.Temporary[index];
|
||||
} else if (file == RC_FILE_ADDRESS) {
|
||||
return &s->R.Address;
|
||||
} else if (file == RC_FILE_SPECIAL) {
|
||||
if (index >= RC_NUM_SPECIAL_REGISTERS) {
|
||||
rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
|
||||
return NULL;
|
||||
}
|
||||
if (file == RC_FILE_OUTPUT)
|
||||
return &s->R.Output[index];
|
||||
else
|
||||
return &s->R.Temporary[index];
|
||||
} else if (file == RC_FILE_ADDRESS) {
|
||||
return &s->R.Address;
|
||||
} else if (file == RC_FILE_SPECIAL) {
|
||||
if (index >= RC_NUM_SPECIAL_REGISTERS) {
|
||||
rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &s->R.Special[index];
|
||||
}
|
||||
return &s->R.Special[index];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
|
||||
static void
|
||||
mark_used(struct deadcode_state *s, rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
unsigned char * pused = get_used_ptr(s, file, index);
|
||||
if (pused)
|
||||
*pused |= mask;
|
||||
unsigned char *pused = get_used_ptr(s, file, index);
|
||||
if (pused)
|
||||
*pused |= mask;
|
||||
}
|
||||
|
||||
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
|
||||
static void
|
||||
update_instruction(struct deadcode_state *s, struct rc_instruction *inst)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
struct instruction_state * insts = &s->Instructions[inst->IP];
|
||||
unsigned int usedmask = 0;
|
||||
unsigned int srcmasks[3];
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
struct instruction_state *insts = &s->Instructions[inst->IP];
|
||||
unsigned int usedmask = 0;
|
||||
unsigned int srcmasks[3];
|
||||
|
||||
if (opcode->HasDstReg) {
|
||||
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
|
||||
if (pused) {
|
||||
usedmask = *pused & inst->U.I.DstReg.WriteMask;
|
||||
*pused &= ~usedmask;
|
||||
}
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
unsigned char *pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
|
||||
if (pused) {
|
||||
usedmask = *pused & inst->U.I.DstReg.WriteMask;
|
||||
*pused &= ~usedmask;
|
||||
}
|
||||
}
|
||||
|
||||
insts->WriteMask |= usedmask;
|
||||
insts->WriteMask |= usedmask;
|
||||
|
||||
if (inst->U.I.WriteALUResult) {
|
||||
unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
|
||||
if (pused && *pused) {
|
||||
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
usedmask |= RC_MASK_X;
|
||||
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
usedmask |= RC_MASK_W;
|
||||
if (inst->U.I.WriteALUResult) {
|
||||
unsigned char *pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
|
||||
if (pused && *pused) {
|
||||
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
usedmask |= RC_MASK_X;
|
||||
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
usedmask |= RC_MASK_W;
|
||||
|
||||
*pused = 0;
|
||||
insts->WriteALUResult = 1;
|
||||
}
|
||||
}
|
||||
*pused = 0;
|
||||
insts->WriteALUResult = 1;
|
||||
}
|
||||
}
|
||||
|
||||
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
|
||||
rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
|
||||
|
||||
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
unsigned int refmask = 0;
|
||||
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
|
||||
insts->SrcReg[src] |= newsrcmask;
|
||||
for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
unsigned int refmask = 0;
|
||||
unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
|
||||
insts->SrcReg[src] |= newsrcmask;
|
||||
|
||||
for(unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (GET_BIT(newsrcmask, chan))
|
||||
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
|
||||
}
|
||||
for (unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (GET_BIT(newsrcmask, chan))
|
||||
refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
|
||||
}
|
||||
|
||||
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
|
||||
refmask &= RC_MASK_XYZW;
|
||||
/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
|
||||
refmask &= RC_MASK_XYZW;
|
||||
|
||||
if (!refmask)
|
||||
continue;
|
||||
if (!refmask)
|
||||
continue;
|
||||
|
||||
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
|
||||
mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
|
||||
|
||||
if (inst->U.I.SrcReg[src].RelAddr)
|
||||
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
|
||||
}
|
||||
if (inst->U.I.SrcReg[src].RelAddr)
|
||||
mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
|
||||
}
|
||||
}
|
||||
|
||||
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
|
||||
void
|
||||
rc_dataflow_deadcode(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct deadcode_state s;
|
||||
unsigned int nr_instructions;
|
||||
unsigned int ip;
|
||||
struct deadcode_state s;
|
||||
unsigned int nr_instructions;
|
||||
unsigned int ip;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = c;
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = c;
|
||||
|
||||
nr_instructions = rc_recompute_ips(c);
|
||||
s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
|
||||
memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
|
||||
nr_instructions = rc_recompute_ips(c);
|
||||
s.Instructions =
|
||||
memory_pool_malloc(&c->Pool, sizeof(struct instruction_state) * nr_instructions);
|
||||
memset(s.Instructions, 0, sizeof(struct instruction_state) * nr_instructions);
|
||||
|
||||
for(struct rc_instruction * inst = c->Program.Instructions.Prev;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Prev) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Prev;
|
||||
inst != &c->Program.Instructions; inst = inst->Prev) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
/* Assume all output regs are live. Anything else should have been
|
||||
* eliminated before it got to us.
|
||||
*/
|
||||
if (opcode->HasDstReg)
|
||||
mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
|
||||
/* Assume all output regs are live. Anything else should have been
|
||||
* eliminated before it got to us.
|
||||
*/
|
||||
if (opcode->HasDstReg)
|
||||
mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
|
||||
|
||||
switch(opcode->Opcode){
|
||||
/* Mark all sources in the loop body as used before doing
|
||||
* normal deadcode analysis. This is probably not optimal.
|
||||
* Save this pessimistic deadcode state and restore it anytime
|
||||
* we see a break just to be extra sure.
|
||||
*/
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
{
|
||||
int endloops = 1;
|
||||
struct rc_instruction *ptr;
|
||||
for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
|
||||
opcode = rc_get_opcode_info(ptr->U.I.Opcode);
|
||||
if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
|
||||
endloops--;
|
||||
continue;
|
||||
}
|
||||
if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
|
||||
endloops++;
|
||||
continue;
|
||||
}
|
||||
if(opcode->HasDstReg){
|
||||
int src = 0;
|
||||
unsigned int srcmasks[3];
|
||||
unsigned int writemask = ptr->U.I.DstReg.WriteMask;
|
||||
if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
writemask |= RC_MASK_X;
|
||||
else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
writemask |= RC_MASK_W;
|
||||
switch (opcode->Opcode) {
|
||||
/* Mark all sources in the loop body as used before doing
|
||||
* normal deadcode analysis. This is probably not optimal.
|
||||
* Save this pessimistic deadcode state and restore it anytime
|
||||
* we see a break just to be extra sure.
|
||||
*/
|
||||
case RC_OPCODE_ENDLOOP: {
|
||||
int endloops = 1;
|
||||
struct rc_instruction *ptr;
|
||||
for (ptr = inst->Prev; endloops > 0; ptr = ptr->Prev) {
|
||||
opcode = rc_get_opcode_info(ptr->U.I.Opcode);
|
||||
if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
|
||||
endloops--;
|
||||
continue;
|
||||
}
|
||||
if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
|
||||
endloops++;
|
||||
continue;
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
int src = 0;
|
||||
unsigned int srcmasks[3];
|
||||
unsigned int writemask = ptr->U.I.DstReg.WriteMask;
|
||||
if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
writemask |= RC_MASK_X;
|
||||
else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
writemask |= RC_MASK_W;
|
||||
|
||||
rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
|
||||
for(src=0; src < opcode->NumSrcRegs; src++){
|
||||
mark_used(&s,
|
||||
ptr->U.I.SrcReg[src].File,
|
||||
ptr->U.I.SrcReg[src].Index,
|
||||
srcmasks[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
push_loop(&s);
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_BRK:
|
||||
{
|
||||
struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
|
||||
memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
s.LoopStackSize--;
|
||||
break;
|
||||
case RC_OPCODE_CONT:
|
||||
break;
|
||||
case RC_OPCODE_ENDIF:
|
||||
push_branch(&s);
|
||||
break;
|
||||
default:
|
||||
if (opcode->IsFlowControl && s.BranchStackSize) {
|
||||
struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
|
||||
if (opcode->Opcode == RC_OPCODE_IF) {
|
||||
or_updatemasks(&s.R,
|
||||
&s.R,
|
||||
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
|
||||
rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
|
||||
for (src = 0; src < opcode->NumSrcRegs; src++) {
|
||||
mark_used(&s, ptr->U.I.SrcReg[src].File, ptr->U.I.SrcReg[src].Index,
|
||||
srcmasks[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
push_loop(&s);
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_BRK: {
|
||||
struct loopinfo *loop = &s.LoopStack[s.LoopStackSize - 1];
|
||||
memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
|
||||
break;
|
||||
}
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
s.LoopStackSize--;
|
||||
break;
|
||||
case RC_OPCODE_CONT:
|
||||
break;
|
||||
case RC_OPCODE_ENDIF:
|
||||
push_branch(&s);
|
||||
break;
|
||||
default:
|
||||
if (opcode->IsFlowControl && s.BranchStackSize) {
|
||||
struct branchinfo *branch = &s.BranchStack[s.BranchStackSize - 1];
|
||||
if (opcode->Opcode == RC_OPCODE_IF) {
|
||||
or_updatemasks(&s.R, &s.R,
|
||||
branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
|
||||
|
||||
s.BranchStackSize--;
|
||||
} else if (opcode->Opcode == RC_OPCODE_ELSE) {
|
||||
if (branch->HaveElse) {
|
||||
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
|
||||
} else {
|
||||
memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
|
||||
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
|
||||
branch->HaveElse = 1;
|
||||
}
|
||||
} else {
|
||||
rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
|
||||
}
|
||||
}
|
||||
}
|
||||
s.BranchStackSize--;
|
||||
} else if (opcode->Opcode == RC_OPCODE_ELSE) {
|
||||
if (branch->HaveElse) {
|
||||
rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
|
||||
} else {
|
||||
memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
|
||||
memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
|
||||
branch->HaveElse = 1;
|
||||
}
|
||||
} else {
|
||||
rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
update_instruction(&s, inst);
|
||||
}
|
||||
update_instruction(&s, inst);
|
||||
}
|
||||
|
||||
ip = 0;
|
||||
for(struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next, ++ip) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
int dead = 1;
|
||||
unsigned int srcmasks[3];
|
||||
unsigned int usemask;
|
||||
ip = 0;
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next, ++ip) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
int dead = 1;
|
||||
unsigned int srcmasks[3];
|
||||
unsigned int usemask;
|
||||
|
||||
if (!opcode->HasDstReg) {
|
||||
dead = 0;
|
||||
} else {
|
||||
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
|
||||
if (s.Instructions[ip].WriteMask)
|
||||
dead = 0;
|
||||
if (!opcode->HasDstReg) {
|
||||
dead = 0;
|
||||
} else {
|
||||
inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
|
||||
if (s.Instructions[ip].WriteMask)
|
||||
dead = 0;
|
||||
|
||||
if (s.Instructions[ip].WriteALUResult)
|
||||
dead = 0;
|
||||
else
|
||||
inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
|
||||
}
|
||||
if (s.Instructions[ip].WriteALUResult)
|
||||
dead = 0;
|
||||
else
|
||||
inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
|
||||
}
|
||||
|
||||
if (dead) {
|
||||
struct rc_instruction * todelete = inst;
|
||||
inst = inst->Prev;
|
||||
rc_remove_instruction(todelete);
|
||||
continue;
|
||||
}
|
||||
if (dead) {
|
||||
struct rc_instruction *todelete = inst;
|
||||
inst = inst->Prev;
|
||||
rc_remove_instruction(todelete);
|
||||
continue;
|
||||
}
|
||||
|
||||
usemask = s.Instructions[ip].WriteMask;
|
||||
usemask = s.Instructions[ip].WriteMask;
|
||||
|
||||
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
usemask |= RC_MASK_X;
|
||||
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
usemask |= RC_MASK_W;
|
||||
if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
|
||||
usemask |= RC_MASK_X;
|
||||
else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
|
||||
usemask |= RC_MASK_W;
|
||||
|
||||
rc_compute_sources_for_writemask(inst, usemask, srcmasks);
|
||||
rc_compute_sources_for_writemask(inst, usemask, srcmasks);
|
||||
|
||||
for(unsigned int src = 0; src < 3; ++src) {
|
||||
for(unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (!GET_BIT(srcmasks[src], chan))
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int src = 0; src < 3; ++src) {
|
||||
for (unsigned int chan = 0; chan < 4; ++chan) {
|
||||
if (!GET_BIT(srcmasks[src], chan))
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc_calculate_inputs_outputs(c);
|
||||
rc_calculate_inputs_outputs(c);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4,18 +4,22 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "util/u_bitcast.h"
|
||||
#include "radeon_compiler.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_constants.h"
|
||||
#include "radeon_swizzle.h"
|
||||
#include "util/u_bitcast.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define VERBOSE 0
|
||||
|
||||
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||
#define DBG(...) \
|
||||
do { \
|
||||
if (VERBOSE) \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
/* IEEE-754:
|
||||
* 22:0 mantissa
|
||||
|
|
@ -26,125 +30,120 @@
|
|||
* 0:2 mantissa
|
||||
* 3:6 exponent (bias 7)
|
||||
*/
|
||||
static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
|
||||
static int
|
||||
ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
|
||||
{
|
||||
unsigned float_bits = u_bitcast_f2u(f);
|
||||
/* XXX: Handle big-endian */
|
||||
unsigned mantissa = float_bits & 0x007fffff;
|
||||
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
|
||||
unsigned negate = !!(float_bits & 0x80000000);
|
||||
int exponent = biased_exponent - 127;
|
||||
unsigned mantissa_mask = 0xff8fffff;
|
||||
unsigned r300_exponent, r300_mantissa;
|
||||
unsigned float_bits = u_bitcast_f2u(f);
|
||||
/* XXX: Handle big-endian */
|
||||
unsigned mantissa = float_bits & 0x007fffff;
|
||||
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
|
||||
unsigned negate = !!(float_bits & 0x80000000);
|
||||
int exponent = biased_exponent - 127;
|
||||
unsigned mantissa_mask = 0xff8fffff;
|
||||
unsigned r300_exponent, r300_mantissa;
|
||||
|
||||
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
|
||||
DBG("Raw exponent = %d\n", exponent);
|
||||
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
|
||||
DBG("Raw exponent = %d\n", exponent);
|
||||
|
||||
if (exponent < -7 || exponent > 8) {
|
||||
DBG("Failed exponent out of range\n\n");
|
||||
return 0;
|
||||
}
|
||||
if (exponent < -7 || exponent > 8) {
|
||||
DBG("Failed exponent out of range\n\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mantissa & mantissa_mask) {
|
||||
DBG("Failed mantissa has too many bits:\n"
|
||||
"mantissa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
|
||||
mantissa, mantissa_mask,
|
||||
mantissa & mantissa_mask);
|
||||
return 0;
|
||||
}
|
||||
if (mantissa & mantissa_mask) {
|
||||
DBG("Failed mantissa has too many bits:\n"
|
||||
"mantissa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
|
||||
mantissa, mantissa_mask, mantissa & mantissa_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
r300_exponent = exponent + 7;
|
||||
r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
|
||||
*r300_float_out = r300_mantissa | (r300_exponent << 3);
|
||||
r300_exponent = exponent + 7;
|
||||
r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
|
||||
*r300_float_out = r300_mantissa | (r300_exponent << 3);
|
||||
|
||||
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
|
||||
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
|
||||
|
||||
if (negate)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
if (negate)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
void rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
unsigned src_idx;
|
||||
struct rc_constant * constant;
|
||||
float float_value;
|
||||
unsigned char r300_float = 0;
|
||||
int ret;
|
||||
unsigned src_idx;
|
||||
struct rc_constant *constant;
|
||||
float float_value;
|
||||
unsigned char r300_float = 0;
|
||||
int ret;
|
||||
|
||||
/* XXX: Handle presub */
|
||||
/* XXX: Handle presub */
|
||||
|
||||
/* We aren't using rc_for_all_reads_src here, because presub
|
||||
* sources need to be handled differently. */
|
||||
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
|
||||
unsigned use_literal = 0;
|
||||
unsigned swz, chan;
|
||||
struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx];
|
||||
if (src_reg.File != RC_FILE_CONSTANT) {
|
||||
continue;
|
||||
}
|
||||
constant =
|
||||
&c->Program.Constants.Constants[src_reg.Index];
|
||||
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
|
||||
continue;
|
||||
}
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
unsigned char r300_float_tmp;
|
||||
swz = GET_SWZ(src_reg.Swizzle, chan);
|
||||
if (swz >= RC_SWIZZLE_ZERO) {
|
||||
continue;
|
||||
}
|
||||
float_value = constant->u.Immediate[swz];
|
||||
ret = ieee_754_to_r300_float(float_value,
|
||||
&r300_float_tmp);
|
||||
if (!ret || (use_literal &&
|
||||
r300_float != r300_float_tmp)) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
/* We aren't using rc_for_all_reads_src here, because presub
|
||||
* sources need to be handled differently. */
|
||||
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
|
||||
unsigned use_literal = 0;
|
||||
unsigned swz, chan;
|
||||
struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx];
|
||||
if (src_reg.File != RC_FILE_CONSTANT) {
|
||||
continue;
|
||||
}
|
||||
constant = &c->Program.Constants.Constants[src_reg.Index];
|
||||
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
|
||||
continue;
|
||||
}
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
unsigned char r300_float_tmp;
|
||||
swz = GET_SWZ(src_reg.Swizzle, chan);
|
||||
if (swz >= RC_SWIZZLE_ZERO) {
|
||||
continue;
|
||||
}
|
||||
float_value = constant->u.Immediate[swz];
|
||||
ret = ieee_754_to_r300_float(float_value, &r300_float_tmp);
|
||||
if (!ret || (use_literal && r300_float != r300_float_tmp)) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret == -1 && src_reg.Abs) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
if (ret == -1 && src_reg.Abs) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!use_literal) {
|
||||
r300_float = r300_float_tmp;
|
||||
use_literal = 1;
|
||||
}
|
||||
if (!use_literal) {
|
||||
r300_float = r300_float_tmp;
|
||||
use_literal = 1;
|
||||
}
|
||||
|
||||
/* We can use any swizzle, so if this is ADD it might
|
||||
* be smart to us the same swizzle as the other src uses
|
||||
* so that we potentially enable presubtract later.
|
||||
* Use RC_SWIZZLE_W otherwise, so it will become one of
|
||||
* the alpha sources.
|
||||
*/
|
||||
if (info->Opcode == RC_OPCODE_ADD &&
|
||||
GET_SWZ(inst->U.I.SrcReg[1 - src_idx].Swizzle, chan) == chan) {
|
||||
SET_SWZ(src_reg.Swizzle, chan, chan);
|
||||
} else {
|
||||
SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W);
|
||||
}
|
||||
if (ret == -1) {
|
||||
src_reg.Negate ^= (1 << chan);
|
||||
}
|
||||
}
|
||||
/* We can use any swizzle, so if this is ADD it might
|
||||
* be smart to us the same swizzle as the other src uses
|
||||
* so that we potentially enable presubtract later.
|
||||
* Use RC_SWIZZLE_W otherwise, so it will become one of
|
||||
* the alpha sources.
|
||||
*/
|
||||
if (info->Opcode == RC_OPCODE_ADD &&
|
||||
GET_SWZ(inst->U.I.SrcReg[1 - src_idx].Swizzle, chan) == chan) {
|
||||
SET_SWZ(src_reg.Swizzle, chan, chan);
|
||||
} else {
|
||||
SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W);
|
||||
}
|
||||
if (ret == -1) {
|
||||
src_reg.Negate ^= (1 << chan);
|
||||
}
|
||||
}
|
||||
|
||||
src_reg.File = RC_FILE_INLINE;
|
||||
src_reg.Index = r300_float;
|
||||
if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) {
|
||||
continue;
|
||||
}
|
||||
inst->U.I.SrcReg[src_idx] = src_reg;
|
||||
}
|
||||
}
|
||||
src_reg.File = RC_FILE_INLINE;
|
||||
src_reg.Index = r300_float;
|
||||
if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) {
|
||||
continue;
|
||||
}
|
||||
inst->U.I.SrcReg[src_idx] = src_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,64 +5,70 @@
|
|||
|
||||
#include "radeon_list.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "memory_pool.h"
|
||||
|
||||
struct rc_list * rc_list(struct memory_pool * pool, void * item)
|
||||
struct rc_list *
|
||||
rc_list(struct memory_pool *pool, void *item)
|
||||
{
|
||||
struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
|
||||
new->Item = item;
|
||||
new->Next = NULL;
|
||||
new->Prev = NULL;
|
||||
struct rc_list *new = memory_pool_malloc(pool, sizeof(struct rc_list));
|
||||
new->Item = item;
|
||||
new->Next = NULL;
|
||||
new->Prev = NULL;
|
||||
|
||||
return new;
|
||||
return new;
|
||||
}
|
||||
|
||||
void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
|
||||
void
|
||||
rc_list_add(struct rc_list **list, struct rc_list *new_value)
|
||||
{
|
||||
struct rc_list * temp;
|
||||
struct rc_list *temp;
|
||||
|
||||
if (*list == NULL) {
|
||||
*list = new_value;
|
||||
return;
|
||||
}
|
||||
if (*list == NULL) {
|
||||
*list = new_value;
|
||||
return;
|
||||
}
|
||||
|
||||
for (temp = *list; temp->Next; temp = temp->Next);
|
||||
for (temp = *list; temp->Next; temp = temp->Next)
|
||||
;
|
||||
|
||||
temp->Next = new_value;
|
||||
new_value->Prev = temp;
|
||||
temp->Next = new_value;
|
||||
new_value->Prev = temp;
|
||||
}
|
||||
|
||||
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
|
||||
void
|
||||
rc_list_remove(struct rc_list **list, struct rc_list *rm_value)
|
||||
{
|
||||
if (*list == rm_value) {
|
||||
*list = rm_value->Next;
|
||||
return;
|
||||
}
|
||||
if (*list == rm_value) {
|
||||
*list = rm_value->Next;
|
||||
return;
|
||||
}
|
||||
|
||||
rm_value->Prev->Next = rm_value->Next;
|
||||
if (rm_value->Next) {
|
||||
rm_value->Next->Prev = rm_value->Prev;
|
||||
}
|
||||
rm_value->Prev->Next = rm_value->Next;
|
||||
if (rm_value->Next) {
|
||||
rm_value->Next->Prev = rm_value->Prev;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int rc_list_count(struct rc_list * list)
|
||||
unsigned int
|
||||
rc_list_count(struct rc_list *list)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
while (list) {
|
||||
count++;
|
||||
list = list->Next;
|
||||
}
|
||||
return count;
|
||||
unsigned int count = 0;
|
||||
while (list) {
|
||||
count++;
|
||||
list = list->Next;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void rc_list_print(struct rc_list * list)
|
||||
void
|
||||
rc_list_print(struct rc_list *list)
|
||||
{
|
||||
while(list) {
|
||||
fprintf(stderr, "%p->", list->Item);
|
||||
list = list->Next;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
while (list) {
|
||||
fprintf(stderr, "%p->", list->Item);
|
||||
list = list->Next;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,16 +9,15 @@
|
|||
struct memory_pool;
|
||||
|
||||
struct rc_list {
|
||||
void * Item;
|
||||
struct rc_list * Prev;
|
||||
struct rc_list * Next;
|
||||
void *Item;
|
||||
struct rc_list *Prev;
|
||||
struct rc_list *Next;
|
||||
};
|
||||
|
||||
struct rc_list * rc_list(struct memory_pool * pool, void * item);
|
||||
void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
|
||||
void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
|
||||
unsigned int rc_list_count(struct rc_list * list);
|
||||
void rc_list_print(struct rc_list * list);
|
||||
struct rc_list *rc_list(struct memory_pool *pool, void *item);
|
||||
void rc_list_add(struct rc_list **list, struct rc_list *new_value);
|
||||
void rc_list_remove(struct rc_list **list, struct rc_list *rm_value);
|
||||
unsigned int rc_list_count(struct rc_list *list);
|
||||
void rc_list_print(struct rc_list *list);
|
||||
|
||||
#endif /* RADEON_LIST_H */
|
||||
|
||||
|
|
|
|||
|
|
@ -11,502 +11,500 @@
|
|||
#include "util/compiler.h"
|
||||
|
||||
const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
|
||||
{
|
||||
.Opcode = RC_OPCODE_NOP,
|
||||
.Name = "NOP"
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
|
||||
.Name = "ILLEGAL OPCODE"
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ADD,
|
||||
.Name = "ADD",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ARL,
|
||||
.Name = "ARL",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ARR,
|
||||
.Name = "ARR",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CMP,
|
||||
.Name = "CMP",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CND,
|
||||
.Name = "CND",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_COS,
|
||||
.Name = "COS",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DDX,
|
||||
.Name = "DDX",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DDY,
|
||||
.Name = "DDY",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP2,
|
||||
.Name = "DP2",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP3,
|
||||
.Name = "DP3",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP4,
|
||||
.Name = "DP4",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DST,
|
||||
.Name = "DST",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_EX2,
|
||||
.Name = "EX2",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_EXP,
|
||||
.Name = "EXP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_FRC,
|
||||
.Name = "FRC",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_KIL,
|
||||
.Name = "KIL",
|
||||
.NumSrcRegs = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LG2,
|
||||
.Name = "LG2",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LIT,
|
||||
.Name = "LIT",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LOG,
|
||||
.Name = "LOG",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MAD,
|
||||
.Name = "MAD",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MAX,
|
||||
.Name = "MAX",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MIN,
|
||||
.Name = "MIN",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MOV,
|
||||
.Name = "MOV",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MUL,
|
||||
.Name = "MUL",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_POW,
|
||||
.Name = "POW",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_RCP,
|
||||
.Name = "RCP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ROUND,
|
||||
.Name = "ROUND",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_RSQ,
|
||||
.Name = "RSQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SEQ,
|
||||
.Name = "SEQ",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SGE,
|
||||
.Name = "SGE",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SIN,
|
||||
.Name = "SIN",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SLT,
|
||||
.Name = "SLT",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SNE,
|
||||
.Name = "SNE",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TEX,
|
||||
.Name = "TEX",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXB,
|
||||
.Name = "TXB",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXD,
|
||||
.Name = "TXD",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXL,
|
||||
.Name = "TXL",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXP,
|
||||
.Name = "TXP",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_IF,
|
||||
.Name = "IF",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ELSE,
|
||||
.Name = "ELSE",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ENDIF,
|
||||
.Name = "ENDIF",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BGNLOOP,
|
||||
.Name = "BGNLOOP",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BRK,
|
||||
.Name = "BRK",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ENDLOOP,
|
||||
.Name = "ENDLOOP",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CONT,
|
||||
.Name = "CONT",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_REPL_ALPHA,
|
||||
.Name = "REPL_ALPHA",
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BEGIN_TEX,
|
||||
.Name = "BEGIN_TEX"
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_KILP,
|
||||
.Name = "KILP",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SEQ,
|
||||
.Name = "ME_PRED_SEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGT,
|
||||
.Name = "ME_PRED_SGT",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGE,
|
||||
.Name = "ME_PRED_SGE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SNEQ,
|
||||
.Name = "ME_PRED_SNEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_CLR,
|
||||
.Name = "ME_PRED_SET_CLEAR",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_INV,
|
||||
.Name = "ME_PRED_SET_INV",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_POP,
|
||||
.Name = "ME_PRED_SET_POP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_RESTORE,
|
||||
.Name = "ME_PRED_SET_RESTORE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SEQ_PUSH,
|
||||
.Name = "VE_PRED_SEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGT_PUSH,
|
||||
.Name = "VE_PRED_SGT_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGE_PUSH,
|
||||
.Name = "VE_PRED_SGE_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SNEQ_PUSH,
|
||||
.Name = "VE_PRED_SNEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1
|
||||
}
|
||||
};
|
||||
{
|
||||
.Opcode = RC_OPCODE_NOP,
|
||||
.Name = "NOP",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
|
||||
.Name = "ILLEGAL OPCODE",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ADD,
|
||||
.Name = "ADD",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ARL,
|
||||
.Name = "ARL",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ARR,
|
||||
.Name = "ARR",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CMP,
|
||||
.Name = "CMP",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CND,
|
||||
.Name = "CND",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_COS,
|
||||
.Name = "COS",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DDX,
|
||||
.Name = "DDX",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DDY,
|
||||
.Name = "DDY",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP2,
|
||||
.Name = "DP2",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP3,
|
||||
.Name = "DP3",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DP4,
|
||||
.Name = "DP4",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_DST,
|
||||
.Name = "DST",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_EX2,
|
||||
.Name = "EX2",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_EXP,
|
||||
.Name = "EXP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_FRC,
|
||||
.Name = "FRC",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_KIL,
|
||||
.Name = "KIL",
|
||||
.NumSrcRegs = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LG2,
|
||||
.Name = "LG2",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LIT,
|
||||
.Name = "LIT",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_LOG,
|
||||
.Name = "LOG",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MAD,
|
||||
.Name = "MAD",
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MAX,
|
||||
.Name = "MAX",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MIN,
|
||||
.Name = "MIN",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MOV,
|
||||
.Name = "MOV",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_MUL,
|
||||
.Name = "MUL",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_POW,
|
||||
.Name = "POW",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_RCP,
|
||||
.Name = "RCP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ROUND,
|
||||
.Name = "ROUND",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_RSQ,
|
||||
.Name = "RSQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SEQ,
|
||||
.Name = "SEQ",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SGE,
|
||||
.Name = "SGE",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SIN,
|
||||
.Name = "SIN",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
.IsStandardScalar = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SLT,
|
||||
.Name = "SLT",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_SNE,
|
||||
.Name = "SNE",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
.IsComponentwise = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TEX,
|
||||
.Name = "TEX",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXB,
|
||||
.Name = "TXB",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXD,
|
||||
.Name = "TXD",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 3,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXL,
|
||||
.Name = "TXL",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_TXP,
|
||||
.Name = "TXP",
|
||||
.HasTexture = 1,
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_IF,
|
||||
.Name = "IF",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ELSE,
|
||||
.Name = "ELSE",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ENDIF,
|
||||
.Name = "ENDIF",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BGNLOOP,
|
||||
.Name = "BGNLOOP",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BRK,
|
||||
.Name = "BRK",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_ENDLOOP,
|
||||
.Name = "ENDLOOP",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_CONT,
|
||||
.Name = "CONT",
|
||||
.IsFlowControl = 1,
|
||||
.NumSrcRegs = 0,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_REPL_ALPHA,
|
||||
.Name = "REPL_ALPHA",
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_BEGIN_TEX,
|
||||
.Name = "BEGIN_TEX",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_OPCODE_KILP,
|
||||
.Name = "KILP",
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SEQ,
|
||||
.Name = "ME_PRED_SEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGT,
|
||||
.Name = "ME_PRED_SGT",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SGE,
|
||||
.Name = "ME_PRED_SGE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SNEQ,
|
||||
.Name = "ME_PRED_SNEQ",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_CLR,
|
||||
.Name = "ME_PRED_SET_CLEAR",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_INV,
|
||||
.Name = "ME_PRED_SET_INV",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_POP,
|
||||
.Name = "ME_PRED_SET_POP",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_ME_PRED_SET_RESTORE,
|
||||
.Name = "ME_PRED_SET_RESTORE",
|
||||
.NumSrcRegs = 1,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SEQ_PUSH,
|
||||
.Name = "VE_PRED_SEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGT_PUSH,
|
||||
.Name = "VE_PRED_SGT_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SGE_PUSH,
|
||||
.Name = "VE_PRED_SGE_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
},
|
||||
{
|
||||
.Opcode = RC_VE_PRED_SNEQ_PUSH,
|
||||
.Name = "VE_PRED_SNEQ_PUSH",
|
||||
.NumSrcRegs = 2,
|
||||
.HasDstReg = 1,
|
||||
}};
|
||||
|
||||
void rc_compute_sources_for_writemask(
|
||||
const struct rc_instruction *inst,
|
||||
unsigned int writemask,
|
||||
unsigned int *srcmasks)
|
||||
void
|
||||
rc_compute_sources_for_writemask(const struct rc_instruction *inst, unsigned int writemask,
|
||||
unsigned int *srcmasks)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
srcmasks[0] = 0;
|
||||
srcmasks[1] = 0;
|
||||
srcmasks[2] = 0;
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
srcmasks[0] = 0;
|
||||
srcmasks[1] = 0;
|
||||
srcmasks[2] = 0;
|
||||
|
||||
if (opcode->Opcode == RC_OPCODE_KIL)
|
||||
srcmasks[0] |= RC_MASK_XYZW;
|
||||
else if (opcode->Opcode == RC_OPCODE_IF)
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
if (opcode->Opcode == RC_OPCODE_KIL)
|
||||
srcmasks[0] |= RC_MASK_XYZW;
|
||||
else if (opcode->Opcode == RC_OPCODE_IF)
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
|
||||
if (!writemask)
|
||||
return;
|
||||
if (!writemask)
|
||||
return;
|
||||
|
||||
if (opcode->IsComponentwise) {
|
||||
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
|
||||
srcmasks[src] |= writemask;
|
||||
} else if (opcode->IsStandardScalar) {
|
||||
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
|
||||
srcmasks[src] |= writemask;
|
||||
} else {
|
||||
switch(opcode->Opcode) {
|
||||
case RC_OPCODE_ARL:
|
||||
case RC_OPCODE_ARR:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_OPCODE_DP2:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
srcmasks[1] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_OPCODE_DP3:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
srcmasks[1] |= RC_MASK_XYZ;
|
||||
break;
|
||||
case RC_OPCODE_DP4:
|
||||
srcmasks[0] |= RC_MASK_XYZW;
|
||||
srcmasks[1] |= RC_MASK_XYZW;
|
||||
break;
|
||||
case RC_OPCODE_TXB:
|
||||
case RC_OPCODE_TXP:
|
||||
case RC_OPCODE_TXL:
|
||||
srcmasks[0] |= RC_MASK_W;
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_TEX:
|
||||
switch (inst->U.I.TexSrcTarget) {
|
||||
case RC_TEXTURE_1D:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_TEXTURE_2D:
|
||||
case RC_TEXTURE_RECT:
|
||||
case RC_TEXTURE_1D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_TEXTURE_3D:
|
||||
case RC_TEXTURE_CUBE:
|
||||
case RC_TEXTURE_2D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RC_OPCODE_TXD:
|
||||
switch (inst->U.I.TexSrcTarget) {
|
||||
case RC_TEXTURE_1D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_Y;
|
||||
FALLTHROUGH;
|
||||
case RC_TEXTURE_1D:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
srcmasks[1] |= RC_MASK_X;
|
||||
srcmasks[2] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_TEXTURE_2D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_Z;
|
||||
FALLTHROUGH;
|
||||
case RC_TEXTURE_2D:
|
||||
case RC_TEXTURE_RECT:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
srcmasks[1] |= RC_MASK_XY;
|
||||
srcmasks[2] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_TEXTURE_3D:
|
||||
case RC_TEXTURE_CUBE:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
srcmasks[1] |= RC_MASK_XYZ;
|
||||
srcmasks[2] |= RC_MASK_XYZ;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RC_OPCODE_DST:
|
||||
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
|
||||
srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
|
||||
break;
|
||||
case RC_OPCODE_EXP:
|
||||
case RC_OPCODE_LOG:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_OPCODE_LIT:
|
||||
srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (opcode->IsComponentwise) {
|
||||
for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
|
||||
srcmasks[src] |= writemask;
|
||||
} else if (opcode->IsStandardScalar) {
|
||||
for (unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
|
||||
srcmasks[src] |= writemask;
|
||||
} else {
|
||||
switch (opcode->Opcode) {
|
||||
case RC_OPCODE_ARL:
|
||||
case RC_OPCODE_ARR:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_OPCODE_DP2:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
srcmasks[1] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_OPCODE_DP3:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
srcmasks[1] |= RC_MASK_XYZ;
|
||||
break;
|
||||
case RC_OPCODE_DP4:
|
||||
srcmasks[0] |= RC_MASK_XYZW;
|
||||
srcmasks[1] |= RC_MASK_XYZW;
|
||||
break;
|
||||
case RC_OPCODE_TXB:
|
||||
case RC_OPCODE_TXP:
|
||||
case RC_OPCODE_TXL:
|
||||
srcmasks[0] |= RC_MASK_W;
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_TEX:
|
||||
switch (inst->U.I.TexSrcTarget) {
|
||||
case RC_TEXTURE_1D:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_TEXTURE_2D:
|
||||
case RC_TEXTURE_RECT:
|
||||
case RC_TEXTURE_1D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_TEXTURE_3D:
|
||||
case RC_TEXTURE_CUBE:
|
||||
case RC_TEXTURE_2D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RC_OPCODE_TXD:
|
||||
switch (inst->U.I.TexSrcTarget) {
|
||||
case RC_TEXTURE_1D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_Y;
|
||||
FALLTHROUGH;
|
||||
case RC_TEXTURE_1D:
|
||||
srcmasks[0] |= RC_MASK_X;
|
||||
srcmasks[1] |= RC_MASK_X;
|
||||
srcmasks[2] |= RC_MASK_X;
|
||||
break;
|
||||
case RC_TEXTURE_2D_ARRAY:
|
||||
srcmasks[0] |= RC_MASK_Z;
|
||||
FALLTHROUGH;
|
||||
case RC_TEXTURE_2D:
|
||||
case RC_TEXTURE_RECT:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
srcmasks[1] |= RC_MASK_XY;
|
||||
srcmasks[2] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_TEXTURE_3D:
|
||||
case RC_TEXTURE_CUBE:
|
||||
srcmasks[0] |= RC_MASK_XYZ;
|
||||
srcmasks[1] |= RC_MASK_XYZ;
|
||||
srcmasks[2] |= RC_MASK_XYZ;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case RC_OPCODE_DST:
|
||||
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
|
||||
srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
|
||||
break;
|
||||
case RC_OPCODE_EXP:
|
||||
case RC_OPCODE_LOG:
|
||||
srcmasks[0] |= RC_MASK_XY;
|
||||
break;
|
||||
case RC_OPCODE_LIT:
|
||||
srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,210 +12,208 @@
|
|||
* Opcodes understood by the Radeon compiler.
|
||||
*/
|
||||
typedef enum {
|
||||
RC_OPCODE_NOP = 0,
|
||||
RC_OPCODE_ILLEGAL_OPCODE,
|
||||
RC_OPCODE_NOP = 0,
|
||||
RC_OPCODE_ILLEGAL_OPCODE,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c + src1.c; */
|
||||
RC_OPCODE_ADD,
|
||||
/** vec4 instruction: dst.c = src0.c + src1.c; */
|
||||
RC_OPCODE_ADD,
|
||||
|
||||
/** special instruction: load address register
|
||||
* dst.x = floor(src.x), where dst must be an address register */
|
||||
RC_OPCODE_ARL,
|
||||
/** special instruction: load address register
|
||||
* dst.x = floor(src.x), where dst must be an address register */
|
||||
RC_OPCODE_ARL,
|
||||
|
||||
/** special instruction: load address register with round
|
||||
* dst.x = round(src.x), where dst must be an address register */
|
||||
RC_OPCODE_ARR,
|
||||
/** special instruction: load address register with round
|
||||
* dst.x = round(src.x), where dst must be an address register */
|
||||
RC_OPCODE_ARR,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
|
||||
RC_OPCODE_CMP,
|
||||
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
|
||||
RC_OPCODE_CMP,
|
||||
|
||||
/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
|
||||
RC_OPCODE_CND,
|
||||
/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
|
||||
RC_OPCODE_CND,
|
||||
|
||||
/** scalar instruction: dst = cos(src0.x) */
|
||||
RC_OPCODE_COS,
|
||||
/** scalar instruction: dst = cos(src0.x) */
|
||||
RC_OPCODE_COS,
|
||||
|
||||
/** special instruction: take vec4 partial derivative in X direction
|
||||
* dst.c = d src0.c / dx */
|
||||
RC_OPCODE_DDX,
|
||||
/** special instruction: take vec4 partial derivative in X direction
|
||||
* dst.c = d src0.c / dx */
|
||||
RC_OPCODE_DDX,
|
||||
|
||||
/** special instruction: take vec4 partial derivative in Y direction
|
||||
* dst.c = d src0.c / dy */
|
||||
RC_OPCODE_DDY,
|
||||
/** special instruction: take vec4 partial derivative in Y direction
|
||||
* dst.c = d src0.c / dy */
|
||||
RC_OPCODE_DDY,
|
||||
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
|
||||
RC_OPCODE_DP2,
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
|
||||
RC_OPCODE_DP2,
|
||||
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
|
||||
RC_OPCODE_DP3,
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
|
||||
RC_OPCODE_DP3,
|
||||
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
|
||||
RC_OPCODE_DP4,
|
||||
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
|
||||
RC_OPCODE_DP4,
|
||||
|
||||
/** special instruction, see ARB_fragment_program */
|
||||
RC_OPCODE_DST,
|
||||
/** special instruction, see ARB_fragment_program */
|
||||
RC_OPCODE_DST,
|
||||
|
||||
/** scalar instruction: dst = 2**src0.x */
|
||||
RC_OPCODE_EX2,
|
||||
/** scalar instruction: dst = 2**src0.x */
|
||||
RC_OPCODE_EX2,
|
||||
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_EXP,
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_EXP,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
|
||||
RC_OPCODE_FRC,
|
||||
/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
|
||||
RC_OPCODE_FRC,
|
||||
|
||||
/** special instruction: stop execution if any component of src0 is negative */
|
||||
RC_OPCODE_KIL,
|
||||
/** special instruction: stop execution if any component of src0 is negative */
|
||||
RC_OPCODE_KIL,
|
||||
|
||||
/** scalar instruction: dst = log_2(src0.x) */
|
||||
RC_OPCODE_LG2,
|
||||
/** scalar instruction: dst = log_2(src0.x) */
|
||||
RC_OPCODE_LG2,
|
||||
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_LIT,
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_LIT,
|
||||
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_LOG,
|
||||
/** special instruction, see ARB_vertex_program */
|
||||
RC_OPCODE_LOG,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
|
||||
RC_OPCODE_MAD,
|
||||
/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
|
||||
RC_OPCODE_MAD,
|
||||
|
||||
/** vec4 instruction: dst.c = max(src0.c, src1.c) */
|
||||
RC_OPCODE_MAX,
|
||||
/** vec4 instruction: dst.c = max(src0.c, src1.c) */
|
||||
RC_OPCODE_MAX,
|
||||
|
||||
/** vec4 instruction: dst.c = min(src0.c, src1.c) */
|
||||
RC_OPCODE_MIN,
|
||||
/** vec4 instruction: dst.c = min(src0.c, src1.c) */
|
||||
RC_OPCODE_MIN,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c */
|
||||
RC_OPCODE_MOV,
|
||||
/** vec4 instruction: dst.c = src0.c */
|
||||
RC_OPCODE_MOV,
|
||||
|
||||
/** vec4 instruction: dst.c = src0.c*src1.c */
|
||||
RC_OPCODE_MUL,
|
||||
/** vec4 instruction: dst.c = src0.c*src1.c */
|
||||
RC_OPCODE_MUL,
|
||||
|
||||
/** scalar instruction: dst = src0.x ** src1.x */
|
||||
RC_OPCODE_POW,
|
||||
/** scalar instruction: dst = src0.x ** src1.x */
|
||||
RC_OPCODE_POW,
|
||||
|
||||
/** scalar instruction: dst = 1 / src0.x */
|
||||
RC_OPCODE_RCP,
|
||||
/** scalar instruction: dst = 1 / src0.x */
|
||||
RC_OPCODE_RCP,
|
||||
|
||||
/** vec4 instruction: dst.c = floor(src0.c + 0.5) */
|
||||
RC_OPCODE_ROUND,
|
||||
/** vec4 instruction: dst.c = floor(src0.c + 0.5) */
|
||||
RC_OPCODE_ROUND,
|
||||
|
||||
/** scalar instruction: dst = 1 / sqrt(src0.x) */
|
||||
RC_OPCODE_RSQ,
|
||||
/** scalar instruction: dst = 1 / sqrt(src0.x) */
|
||||
RC_OPCODE_RSQ,
|
||||
|
||||
/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SEQ,
|
||||
/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SEQ,
|
||||
|
||||
/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SGE,
|
||||
/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SGE,
|
||||
|
||||
/** scalar instruction: dst = sin(src0.x) */
|
||||
RC_OPCODE_SIN,
|
||||
/** scalar instruction: dst = sin(src0.x) */
|
||||
RC_OPCODE_SIN,
|
||||
|
||||
/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SLT,
|
||||
/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SLT,
|
||||
|
||||
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SNE,
|
||||
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
|
||||
RC_OPCODE_SNE,
|
||||
|
||||
RC_OPCODE_TEX,
|
||||
RC_OPCODE_TXB,
|
||||
RC_OPCODE_TXD,
|
||||
RC_OPCODE_TXL,
|
||||
RC_OPCODE_TXP,
|
||||
RC_OPCODE_TEX,
|
||||
RC_OPCODE_TXB,
|
||||
RC_OPCODE_TXD,
|
||||
RC_OPCODE_TXL,
|
||||
RC_OPCODE_TXP,
|
||||
|
||||
/** branch instruction:
|
||||
* If src0.x != 0.0, continue with the next instruction;
|
||||
* otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
|
||||
*/
|
||||
RC_OPCODE_IF,
|
||||
/** branch instruction:
|
||||
* If src0.x != 0.0, continue with the next instruction;
|
||||
* otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
|
||||
*/
|
||||
RC_OPCODE_IF,
|
||||
|
||||
/** branch instruction: jump to matching RC_OPCODE_ENDIF */
|
||||
RC_OPCODE_ELSE,
|
||||
/** branch instruction: jump to matching RC_OPCODE_ENDIF */
|
||||
RC_OPCODE_ELSE,
|
||||
|
||||
/** branch instruction: has no effect */
|
||||
RC_OPCODE_ENDIF,
|
||||
|
||||
RC_OPCODE_BGNLOOP,
|
||||
/** branch instruction: has no effect */
|
||||
RC_OPCODE_ENDIF,
|
||||
|
||||
RC_OPCODE_BRK,
|
||||
RC_OPCODE_BGNLOOP,
|
||||
|
||||
RC_OPCODE_ENDLOOP,
|
||||
RC_OPCODE_BRK,
|
||||
|
||||
RC_OPCODE_CONT,
|
||||
RC_OPCODE_ENDLOOP,
|
||||
|
||||
/** special instruction, used in R300-R500 fragment program pair instructions
|
||||
* indicates that the result of the alpha operation shall be replicated
|
||||
* across all other channels */
|
||||
RC_OPCODE_REPL_ALPHA,
|
||||
RC_OPCODE_CONT,
|
||||
|
||||
/** special instruction, used in R300-R500 fragment programs
|
||||
* to indicate the start of a block of texture instructions that
|
||||
* can run simultaneously. */
|
||||
RC_OPCODE_BEGIN_TEX,
|
||||
/** special instruction, used in R300-R500 fragment program pair instructions
|
||||
* indicates that the result of the alpha operation shall be replicated
|
||||
* across all other channels */
|
||||
RC_OPCODE_REPL_ALPHA,
|
||||
|
||||
/** Stop execution of the shader (GLSL discard) */
|
||||
RC_OPCODE_KILP,
|
||||
/** special instruction, used in R300-R500 fragment programs
|
||||
* to indicate the start of a block of texture instructions that
|
||||
* can run simultaneously. */
|
||||
RC_OPCODE_BEGIN_TEX,
|
||||
|
||||
/* Vertex shader CF Instructions */
|
||||
RC_ME_PRED_SEQ,
|
||||
RC_ME_PRED_SGT,
|
||||
RC_ME_PRED_SGE,
|
||||
RC_ME_PRED_SNEQ,
|
||||
RC_ME_PRED_SET_CLR,
|
||||
RC_ME_PRED_SET_INV,
|
||||
RC_ME_PRED_SET_POP,
|
||||
RC_ME_PRED_SET_RESTORE,
|
||||
/** Stop execution of the shader (GLSL discard) */
|
||||
RC_OPCODE_KILP,
|
||||
|
||||
RC_VE_PRED_SEQ_PUSH,
|
||||
RC_VE_PRED_SGT_PUSH,
|
||||
RC_VE_PRED_SGE_PUSH,
|
||||
RC_VE_PRED_SNEQ_PUSH,
|
||||
/* Vertex shader CF Instructions */
|
||||
RC_ME_PRED_SEQ,
|
||||
RC_ME_PRED_SGT,
|
||||
RC_ME_PRED_SGE,
|
||||
RC_ME_PRED_SNEQ,
|
||||
RC_ME_PRED_SET_CLR,
|
||||
RC_ME_PRED_SET_INV,
|
||||
RC_ME_PRED_SET_POP,
|
||||
RC_ME_PRED_SET_RESTORE,
|
||||
|
||||
MAX_RC_OPCODE
|
||||
RC_VE_PRED_SEQ_PUSH,
|
||||
RC_VE_PRED_SGT_PUSH,
|
||||
RC_VE_PRED_SGE_PUSH,
|
||||
RC_VE_PRED_SNEQ_PUSH,
|
||||
|
||||
MAX_RC_OPCODE
|
||||
} rc_opcode;
|
||||
|
||||
|
||||
struct rc_opcode_info {
|
||||
rc_opcode Opcode;
|
||||
const char * Name;
|
||||
rc_opcode Opcode;
|
||||
const char *Name;
|
||||
|
||||
/** true if the instruction reads from a texture.
|
||||
*
|
||||
* \note This is false for the KIL instruction, even though KIL is
|
||||
* a texture instruction from a hardware point of view. */
|
||||
unsigned int HasTexture:1;
|
||||
/** true if the instruction reads from a texture.
|
||||
*
|
||||
* \note This is false for the KIL instruction, even though KIL is
|
||||
* a texture instruction from a hardware point of view. */
|
||||
unsigned int HasTexture : 1;
|
||||
|
||||
unsigned int NumSrcRegs:2;
|
||||
unsigned int HasDstReg:1;
|
||||
unsigned int NumSrcRegs : 2;
|
||||
unsigned int HasDstReg : 1;
|
||||
|
||||
/** true if this instruction affects control flow */
|
||||
unsigned int IsFlowControl:1;
|
||||
/** true if this instruction affects control flow */
|
||||
unsigned int IsFlowControl : 1;
|
||||
|
||||
/** true if this is a vector instruction that operates on components in parallel
|
||||
* without any cross-component interaction */
|
||||
unsigned int IsComponentwise:1;
|
||||
/** true if this is a vector instruction that operates on components in parallel
|
||||
* without any cross-component interaction */
|
||||
unsigned int IsComponentwise : 1;
|
||||
|
||||
/** true if this instruction sources only its operands X components
|
||||
* to compute one result which is smeared across all output channels */
|
||||
unsigned int IsStandardScalar:1;
|
||||
/** true if this instruction sources only its operands X components
|
||||
* to compute one result which is smeared across all output channels */
|
||||
unsigned int IsStandardScalar : 1;
|
||||
};
|
||||
|
||||
extern const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
|
||||
|
||||
static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
|
||||
static inline const struct rc_opcode_info *
|
||||
rc_get_opcode_info(rc_opcode opcode)
|
||||
{
|
||||
assert((unsigned int)opcode < MAX_RC_OPCODE);
|
||||
assert(rc_opcodes[opcode].Opcode == opcode);
|
||||
assert((unsigned int)opcode < MAX_RC_OPCODE);
|
||||
assert(rc_opcodes[opcode].Opcode == opcode);
|
||||
|
||||
return &rc_opcodes[opcode];
|
||||
return &rc_opcodes[opcode];
|
||||
}
|
||||
|
||||
struct rc_instruction;
|
||||
|
||||
void rc_compute_sources_for_writemask(
|
||||
const struct rc_instruction *inst,
|
||||
unsigned int writemask,
|
||||
unsigned int *srcmasks);
|
||||
void rc_compute_sources_for_writemask(const struct rc_instruction *inst, unsigned int writemask,
|
||||
unsigned int *srcmasks);
|
||||
|
||||
#endif /* RADEON_OPCODES_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,59 +8,58 @@
|
|||
#include "radeon_opcodes.h"
|
||||
#include "radeon_program_pair.h"
|
||||
|
||||
static void mark_used_presub(struct rc_pair_sub_instruction * sub)
|
||||
static void
|
||||
mark_used_presub(struct rc_pair_sub_instruction *sub)
|
||||
{
|
||||
if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
unsigned int presub_reg_count = rc_presubtract_src_reg_count(
|
||||
sub->Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
unsigned int i;
|
||||
for (i = 0; i < presub_reg_count; i++) {
|
||||
sub->Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
unsigned int presub_reg_count =
|
||||
rc_presubtract_src_reg_count(sub->Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
unsigned int i;
|
||||
for (i = 0; i < presub_reg_count; i++) {
|
||||
sub->Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mark_used(
|
||||
struct rc_instruction * inst,
|
||||
struct rc_pair_sub_instruction * sub)
|
||||
static void
|
||||
mark_used(struct rc_instruction *inst, struct rc_pair_sub_instruction *sub)
|
||||
{
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
|
||||
if (src_type & RC_SOURCE_RGB) {
|
||||
inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
|
||||
}
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(sub->Opcode);
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
|
||||
if (src_type & RC_SOURCE_RGB) {
|
||||
inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
|
||||
}
|
||||
|
||||
if (src_type & RC_SOURCE_ALPHA) {
|
||||
inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
|
||||
}
|
||||
}
|
||||
if (src_type & RC_SOURCE_ALPHA) {
|
||||
inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This pass finds sources that are not used by their instruction and marks
|
||||
* them as unused.
|
||||
* them as unused.
|
||||
*/
|
||||
void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
|
||||
void
|
||||
rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
for (inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
unsigned int i;
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL)
|
||||
continue;
|
||||
struct rc_instruction *inst;
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
unsigned int i;
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL)
|
||||
continue;
|
||||
|
||||
/* Mark all sources as unused */
|
||||
for (i = 0; i < 4; i++) {
|
||||
inst->U.P.RGB.Src[i].Used = 0;
|
||||
inst->U.P.Alpha.Src[i].Used = 0;
|
||||
}
|
||||
mark_used(inst, &inst->U.P.RGB);
|
||||
mark_used(inst, &inst->U.P.Alpha);
|
||||
/* Mark all sources as unused */
|
||||
for (i = 0; i < 4; i++) {
|
||||
inst->U.P.RGB.Src[i].Used = 0;
|
||||
inst->U.P.Alpha.Src[i].Used = 0;
|
||||
}
|
||||
mark_used(inst, &inst->U.P.RGB);
|
||||
mark_used(inst, &inst->U.P.Alpha);
|
||||
|
||||
mark_used_presub(&inst->U.P.RGB);
|
||||
mark_used_presub(&inst->U.P.Alpha);
|
||||
}
|
||||
mark_used_presub(&inst->U.P.RGB);
|
||||
mark_used_presub(&inst->U.P.Alpha);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include "util/glheader.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "r300_fragprog_swizzle.h"
|
||||
#include "radeon_compiler.h"
|
||||
|
|
@ -21,59 +21,59 @@
|
|||
#include "radeon_regalloc.h"
|
||||
#include "radeon_variable.h"
|
||||
|
||||
static void scan_read_callback(void * data, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
static void
|
||||
scan_read_callback(void *data, struct rc_instruction *inst, rc_register_file file,
|
||||
unsigned int index, unsigned int mask)
|
||||
{
|
||||
struct regalloc_state * s = data;
|
||||
struct register_info * reg;
|
||||
unsigned int i;
|
||||
struct regalloc_state *s = data;
|
||||
struct register_info *reg;
|
||||
unsigned int i;
|
||||
|
||||
if (file != RC_FILE_INPUT)
|
||||
return;
|
||||
if (file != RC_FILE_INPUT)
|
||||
return;
|
||||
|
||||
s->Input[index].Used = 1;
|
||||
reg = &s->Input[index];
|
||||
s->Input[index].Used = 1;
|
||||
reg = &s->Input[index];
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (!((mask >> i) & 0x1)) {
|
||||
continue;
|
||||
}
|
||||
reg->Live[i].Used = 1;
|
||||
reg->Live[i].Start = 0;
|
||||
reg->Live[i].End =
|
||||
s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (!((mask >> i) & 0x1)) {
|
||||
continue;
|
||||
}
|
||||
reg->Live[i].Used = 1;
|
||||
reg->Live[i].Start = 0;
|
||||
reg->Live[i].End = s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
|
||||
}
|
||||
}
|
||||
|
||||
static void remap_register(void * data, struct rc_instruction * inst,
|
||||
rc_register_file * file, unsigned int * index)
|
||||
static void
|
||||
remap_register(void *data, struct rc_instruction *inst, rc_register_file *file, unsigned int *index)
|
||||
{
|
||||
struct regalloc_state * s = data;
|
||||
const struct register_info * reg;
|
||||
struct regalloc_state *s = data;
|
||||
const struct register_info *reg;
|
||||
|
||||
if (*file == RC_FILE_TEMPORARY && s->Simple)
|
||||
reg = &s->Temporary[*index];
|
||||
else if (*file == RC_FILE_INPUT)
|
||||
reg = &s->Input[*index];
|
||||
else
|
||||
return;
|
||||
if (*file == RC_FILE_TEMPORARY && s->Simple)
|
||||
reg = &s->Temporary[*index];
|
||||
else if (*file == RC_FILE_INPUT)
|
||||
reg = &s->Input[*index];
|
||||
else
|
||||
return;
|
||||
|
||||
if (reg->Allocated) {
|
||||
*index = reg->Index;
|
||||
}
|
||||
if (reg->Allocated) {
|
||||
*index = reg->Index;
|
||||
}
|
||||
}
|
||||
|
||||
static void alloc_input_simple(void * data, unsigned int input,
|
||||
unsigned int hwreg)
|
||||
static void
|
||||
alloc_input_simple(void *data, unsigned int input, unsigned int hwreg)
|
||||
{
|
||||
struct regalloc_state * s = data;
|
||||
struct regalloc_state *s = data;
|
||||
|
||||
if (input >= s->NumInputs)
|
||||
return;
|
||||
if (input >= s->NumInputs)
|
||||
return;
|
||||
|
||||
s->Input[input].Allocated = 1;
|
||||
s->Input[input].File = RC_FILE_TEMPORARY;
|
||||
s->Input[input].Index = hwreg;
|
||||
s->Input[input].Allocated = 1;
|
||||
s->Input[input].File = RC_FILE_TEMPORARY;
|
||||
s->Input[input].Index = hwreg;
|
||||
}
|
||||
|
||||
/* This functions offsets the temporary register indices by the number
|
||||
|
|
@ -82,282 +82,263 @@ static void alloc_input_simple(void * data, unsigned int input,
|
|||
*
|
||||
* This pass is supposed to be used to maintain correct allocation of inputs
|
||||
* if the standard register allocation is disabled. */
|
||||
static void do_regalloc_inputs_only(struct regalloc_state * s)
|
||||
static void
|
||||
do_regalloc_inputs_only(struct regalloc_state *s)
|
||||
{
|
||||
for (unsigned i = 0; i < s->NumTemporaries; i++) {
|
||||
s->Temporary[i].Allocated = 1;
|
||||
s->Temporary[i].File = RC_FILE_TEMPORARY;
|
||||
s->Temporary[i].Index = i + s->NumInputs;
|
||||
}
|
||||
for (unsigned i = 0; i < s->NumTemporaries; i++) {
|
||||
s->Temporary[i].Allocated = 1;
|
||||
s->Temporary[i].File = RC_FILE_TEMPORARY;
|
||||
s->Temporary[i].Index = i + s->NumInputs;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int is_derivative(rc_opcode op)
|
||||
static unsigned int
|
||||
is_derivative(rc_opcode op)
|
||||
{
|
||||
return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
|
||||
return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
|
||||
}
|
||||
|
||||
struct variable_get_class_cb_data {
|
||||
unsigned int * can_change_writemask;
|
||||
unsigned int conversion_swizzle;
|
||||
struct radeon_compiler * c;
|
||||
unsigned int *can_change_writemask;
|
||||
unsigned int conversion_swizzle;
|
||||
struct radeon_compiler *c;
|
||||
};
|
||||
|
||||
static void variable_get_class_read_cb(
|
||||
void * userdata,
|
||||
struct rc_instruction * inst,
|
||||
struct rc_pair_instruction_arg * arg,
|
||||
struct rc_pair_instruction_source * src)
|
||||
static void
|
||||
variable_get_class_read_cb(void *userdata, struct rc_instruction *inst,
|
||||
struct rc_pair_instruction_arg *arg,
|
||||
struct rc_pair_instruction_source *src)
|
||||
{
|
||||
struct variable_get_class_cb_data * d = userdata;
|
||||
unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
|
||||
d->conversion_swizzle);
|
||||
/* We can't just call r300_swizzle_is_native basic here, because it ignores the
|
||||
* extra requirements for presubtract. However, after pair translation we no longer
|
||||
* have the rc_src_register required for the native swizzle, so we have to
|
||||
* reconstruct it. */
|
||||
struct rc_src_register reg = {};
|
||||
reg.Swizzle = new_swizzle;
|
||||
reg.File = src->File;
|
||||
struct variable_get_class_cb_data *d = userdata;
|
||||
unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, d->conversion_swizzle);
|
||||
/* We can't just call r300_swizzle_is_native basic here, because it ignores the
|
||||
* extra requirements for presubtract. However, after pair translation we no longer
|
||||
* have the rc_src_register required for the native swizzle, so we have to
|
||||
* reconstruct it. */
|
||||
struct rc_src_register reg = {};
|
||||
reg.Swizzle = new_swizzle;
|
||||
reg.File = src->File;
|
||||
|
||||
assert(inst->Type == RC_INSTRUCTION_PAIR);
|
||||
/* The opcode is unimportant, we can't have TEX here. */
|
||||
if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
|
||||
*d->can_change_writemask = 0;
|
||||
}
|
||||
assert(inst->Type == RC_INSTRUCTION_PAIR);
|
||||
/* The opcode is unimportant, we can't have TEX here. */
|
||||
if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
|
||||
*d->can_change_writemask = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned variable_get_class(
|
||||
struct rc_variable * variable,
|
||||
const struct rc_class * classes)
|
||||
static unsigned
|
||||
variable_get_class(struct rc_variable *variable, const struct rc_class *classes)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int can_change_writemask= 1;
|
||||
unsigned int writemask = rc_variable_writemask_sum(variable);
|
||||
struct rc_list * readers = rc_variable_readers_union(variable);
|
||||
int class_index;
|
||||
unsigned int i;
|
||||
unsigned int can_change_writemask = 1;
|
||||
unsigned int writemask = rc_variable_writemask_sum(variable);
|
||||
struct rc_list *readers = rc_variable_readers_union(variable);
|
||||
int class_index;
|
||||
|
||||
if (!variable->C->is_r500) {
|
||||
struct rc_class c;
|
||||
struct rc_variable * var_ptr;
|
||||
/* The assumption here is that if an instruction has type
|
||||
* RC_INSTRUCTION_NORMAL then it is a TEX instruction.
|
||||
* r300 and r400 can't swizzle the result of a TEX lookup. */
|
||||
for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
|
||||
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
writemask = RC_MASK_XYZW;
|
||||
}
|
||||
}
|
||||
if (!variable->C->is_r500) {
|
||||
struct rc_class c;
|
||||
struct rc_variable *var_ptr;
|
||||
/* The assumption here is that if an instruction has type
|
||||
* RC_INSTRUCTION_NORMAL then it is a TEX instruction.
|
||||
* r300 and r400 can't swizzle the result of a TEX lookup. */
|
||||
for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
|
||||
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
writemask = RC_MASK_XYZW;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if it is possible to do swizzle packing for r300/r400
|
||||
* without creating non-native swizzles. */
|
||||
class_index = rc_find_class(classes, writemask, 3);
|
||||
if (class_index < 0) {
|
||||
goto error;
|
||||
}
|
||||
c = classes[class_index];
|
||||
if (c.WritemaskCount == 1) {
|
||||
goto done;
|
||||
}
|
||||
for (i = 0; i < c.WritemaskCount; i++) {
|
||||
struct rc_variable * var_ptr;
|
||||
for (var_ptr = variable; var_ptr;
|
||||
var_ptr = var_ptr->Friend) {
|
||||
int j;
|
||||
unsigned int conversion_swizzle =
|
||||
rc_make_conversion_swizzle(
|
||||
writemask, c.Writemasks[i]);
|
||||
struct variable_get_class_cb_data d;
|
||||
d.can_change_writemask = &can_change_writemask;
|
||||
d.conversion_swizzle = conversion_swizzle;
|
||||
d.c = variable->C;
|
||||
/* If we get this far var_ptr->Inst has to
|
||||
* be a pair instruction. If variable or any
|
||||
* of its friends are normal instructions,
|
||||
* then the writemask will be set to RC_MASK_XYZW
|
||||
* and the function will return before it gets
|
||||
* here. */
|
||||
rc_pair_for_all_reads_arg(var_ptr->Inst,
|
||||
variable_get_class_read_cb, &d);
|
||||
/* Check if it is possible to do swizzle packing for r300/r400
|
||||
* without creating non-native swizzles. */
|
||||
class_index = rc_find_class(classes, writemask, 3);
|
||||
if (class_index < 0) {
|
||||
goto error;
|
||||
}
|
||||
c = classes[class_index];
|
||||
if (c.WritemaskCount == 1) {
|
||||
goto done;
|
||||
}
|
||||
for (i = 0; i < c.WritemaskCount; i++) {
|
||||
struct rc_variable *var_ptr;
|
||||
for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
|
||||
int j;
|
||||
unsigned int conversion_swizzle =
|
||||
rc_make_conversion_swizzle(writemask, c.Writemasks[i]);
|
||||
struct variable_get_class_cb_data d;
|
||||
d.can_change_writemask = &can_change_writemask;
|
||||
d.conversion_swizzle = conversion_swizzle;
|
||||
d.c = variable->C;
|
||||
/* If we get this far var_ptr->Inst has to
|
||||
* be a pair instruction. If variable or any
|
||||
* of its friends are normal instructions,
|
||||
* then the writemask will be set to RC_MASK_XYZW
|
||||
* and the function will return before it gets
|
||||
* here. */
|
||||
rc_pair_for_all_reads_arg(var_ptr->Inst, variable_get_class_read_cb, &d);
|
||||
|
||||
for (j = 0; j < var_ptr->ReaderCount; j++) {
|
||||
unsigned int old_swizzle;
|
||||
unsigned int new_swizzle;
|
||||
struct rc_reader r = var_ptr->Readers[j];
|
||||
if (r.Inst->Type ==
|
||||
RC_INSTRUCTION_PAIR ) {
|
||||
old_swizzle = r.U.P.Arg->Swizzle;
|
||||
} else {
|
||||
/* Source operands of TEX
|
||||
* instructions can't be
|
||||
* swizzle on r300/r400 GPUs.
|
||||
*/
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
new_swizzle = rc_rewrite_swizzle(
|
||||
old_swizzle, conversion_swizzle);
|
||||
if (!r300_swizzle_is_native_basic(
|
||||
new_swizzle)) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!can_change_writemask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!can_change_writemask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < var_ptr->ReaderCount; j++) {
|
||||
unsigned int old_swizzle;
|
||||
unsigned int new_swizzle;
|
||||
struct rc_reader r = var_ptr->Readers[j];
|
||||
if (r.Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
old_swizzle = r.U.P.Arg->Swizzle;
|
||||
} else {
|
||||
/* Source operands of TEX
|
||||
* instructions can't be
|
||||
* swizzle on r300/r400 GPUs.
|
||||
*/
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
new_swizzle = rc_rewrite_swizzle(old_swizzle, conversion_swizzle);
|
||||
if (!r300_swizzle_is_native_basic(new_swizzle)) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!can_change_writemask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!can_change_writemask) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
/* DDX/DDY seem to always fail when their writemasks are
|
||||
* changed.*/
|
||||
if (is_derivative(variable->Inst->U.P.RGB.Opcode)
|
||||
|| is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
|
||||
can_change_writemask = 0;
|
||||
}
|
||||
}
|
||||
for ( ; readers; readers = readers->Next) {
|
||||
struct rc_reader * r = readers->Item;
|
||||
if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
/* DDX/DDY also fail when their swizzles are changed. */
|
||||
if (is_derivative(r->Inst->U.P.RGB.Opcode)
|
||||
|| is_derivative(r->Inst->U.P.Alpha.Opcode)) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
/* DDX/DDY seem to always fail when their writemasks are
|
||||
* changed.*/
|
||||
if (is_derivative(variable->Inst->U.P.RGB.Opcode) ||
|
||||
is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
|
||||
can_change_writemask = 0;
|
||||
}
|
||||
}
|
||||
for (; readers; readers = readers->Next) {
|
||||
struct rc_reader *r = readers->Item;
|
||||
if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
/* DDX/DDY also fail when their swizzles are changed. */
|
||||
if (is_derivative(r->Inst->U.P.RGB.Opcode) || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
|
||||
can_change_writemask = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class_index = rc_find_class(classes, writemask,
|
||||
can_change_writemask ? 3 : 1);
|
||||
class_index = rc_find_class(classes, writemask, can_change_writemask ? 3 : 1);
|
||||
done:
|
||||
if (class_index > -1) {
|
||||
return classes[class_index].ID;
|
||||
} else {
|
||||
error:
|
||||
rc_error(variable->C,
|
||||
"Could not find class for index=%u mask=%u\n",
|
||||
variable->Dst.Index, writemask);
|
||||
return 0;
|
||||
}
|
||||
if (class_index > -1) {
|
||||
return classes[class_index].ID;
|
||||
} else {
|
||||
error:
|
||||
rc_error(variable->C, "Could not find class for index=%u mask=%u\n", variable->Dst.Index,
|
||||
writemask);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_advanced_regalloc(struct regalloc_state * s)
|
||||
static void
|
||||
do_advanced_regalloc(struct regalloc_state *s)
|
||||
{
|
||||
|
||||
unsigned int i, input_node, node_count, node_index;
|
||||
struct ra_class ** node_classes;
|
||||
struct rc_instruction * inst;
|
||||
struct rc_list * var_ptr;
|
||||
struct rc_list * variables;
|
||||
struct ra_graph * graph;
|
||||
const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
|
||||
unsigned int i, input_node, node_count, node_index;
|
||||
struct ra_class **node_classes;
|
||||
struct rc_instruction *inst;
|
||||
struct rc_list *var_ptr;
|
||||
struct rc_list *variables;
|
||||
struct ra_graph *graph;
|
||||
const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
|
||||
|
||||
/* Get list of program variables */
|
||||
variables = rc_get_variables(s->C);
|
||||
node_count = rc_list_count(variables);
|
||||
node_classes = memory_pool_malloc(&s->C->Pool,
|
||||
node_count * sizeof(struct ra_class *));
|
||||
/* Get list of program variables */
|
||||
variables = rc_get_variables(s->C);
|
||||
node_count = rc_list_count(variables);
|
||||
node_classes = memory_pool_malloc(&s->C->Pool, node_count * sizeof(struct ra_class *));
|
||||
|
||||
for (var_ptr = variables, node_index = 0; var_ptr;
|
||||
var_ptr = var_ptr->Next, node_index++) {
|
||||
unsigned int class_index;
|
||||
/* Compute the live intervals */
|
||||
rc_variable_compute_live_intervals(var_ptr->Item);
|
||||
for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
|
||||
unsigned int class_index;
|
||||
/* Compute the live intervals */
|
||||
rc_variable_compute_live_intervals(var_ptr->Item);
|
||||
|
||||
class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
|
||||
node_classes[node_index] = ra_state->classes[class_index];
|
||||
}
|
||||
class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
|
||||
node_classes[node_index] = ra_state->classes[class_index];
|
||||
}
|
||||
|
||||
/* Calculate live intervals for input registers */
|
||||
for (inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_opcode op = rc_get_flow_control_inst(inst);
|
||||
if (op == RC_OPCODE_BGNLOOP) {
|
||||
struct rc_instruction *endloop = rc_match_bgnloop(inst);
|
||||
if (endloop->IP > s->LoopEnd) {
|
||||
s->LoopEnd = endloop->IP;
|
||||
}
|
||||
}
|
||||
rc_for_all_reads_mask(inst, scan_read_callback, s);
|
||||
}
|
||||
|
||||
/* Calculate live intervals for input registers */
|
||||
for (inst = s->C->Program.Instructions.Next;
|
||||
inst != &s->C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_opcode op = rc_get_flow_control_inst(inst);
|
||||
if (op == RC_OPCODE_BGNLOOP) {
|
||||
struct rc_instruction * endloop =
|
||||
rc_match_bgnloop(inst);
|
||||
if (endloop->IP > s->LoopEnd) {
|
||||
s->LoopEnd = endloop->IP;
|
||||
}
|
||||
}
|
||||
rc_for_all_reads_mask(inst, scan_read_callback, s);
|
||||
}
|
||||
/* Compute the writemask for inputs. */
|
||||
for (i = 0; i < s->NumInputs; i++) {
|
||||
unsigned int chan, writemask = 0;
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (s->Input[i].Live[chan].Used) {
|
||||
writemask |= (1 << chan);
|
||||
}
|
||||
}
|
||||
s->Input[i].Writemask = writemask;
|
||||
}
|
||||
|
||||
/* Compute the writemask for inputs. */
|
||||
for (i = 0; i < s->NumInputs; i++) {
|
||||
unsigned int chan, writemask = 0;
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (s->Input[i].Live[chan].Used) {
|
||||
writemask |= (1 << chan);
|
||||
}
|
||||
}
|
||||
s->Input[i].Writemask = writemask;
|
||||
}
|
||||
graph = ra_alloc_interference_graph(ra_state->regs, node_count + s->NumInputs);
|
||||
|
||||
graph = ra_alloc_interference_graph(ra_state->regs,
|
||||
node_count + s->NumInputs);
|
||||
for (node_index = 0; node_index < node_count; node_index++) {
|
||||
ra_set_node_class(graph, node_index, node_classes[node_index]);
|
||||
}
|
||||
|
||||
for (node_index = 0; node_index < node_count; node_index++) {
|
||||
ra_set_node_class(graph, node_index, node_classes[node_index]);
|
||||
}
|
||||
rc_build_interference_graph(graph, variables);
|
||||
|
||||
rc_build_interference_graph(graph, variables);
|
||||
/* Add input registers to the interference graph */
|
||||
for (i = 0, input_node = 0; i < s->NumInputs; i++) {
|
||||
if (!s->Input[i].Writemask) {
|
||||
continue;
|
||||
}
|
||||
for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
|
||||
struct rc_variable *var = var_ptr->Item;
|
||||
if (rc_overlap_live_intervals_array(s->Input[i].Live, var->Live)) {
|
||||
ra_add_node_interference(graph, node_index, node_count + input_node);
|
||||
}
|
||||
}
|
||||
/* Manually allocate a register for this input */
|
||||
ra_set_node_reg(graph, node_count + input_node,
|
||||
get_reg_id(s->Input[i].Index, s->Input[i].Writemask));
|
||||
input_node++;
|
||||
}
|
||||
|
||||
/* Add input registers to the interference graph */
|
||||
for (i = 0, input_node = 0; i< s->NumInputs; i++) {
|
||||
if (!s->Input[i].Writemask) {
|
||||
continue;
|
||||
}
|
||||
for (var_ptr = variables, node_index = 0;
|
||||
var_ptr; var_ptr = var_ptr->Next, node_index++) {
|
||||
struct rc_variable * var = var_ptr->Item;
|
||||
if (rc_overlap_live_intervals_array(s->Input[i].Live,
|
||||
var->Live)) {
|
||||
ra_add_node_interference(graph, node_index,
|
||||
node_count + input_node);
|
||||
}
|
||||
}
|
||||
/* Manually allocate a register for this input */
|
||||
ra_set_node_reg(graph, node_count + input_node, get_reg_id(
|
||||
s->Input[i].Index, s->Input[i].Writemask));
|
||||
input_node++;
|
||||
}
|
||||
if (!ra_allocate(graph)) {
|
||||
rc_error(s->C, "Ran out of hardware temporaries\n");
|
||||
ralloc_free(graph);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ra_allocate(graph)) {
|
||||
rc_error(s->C, "Ran out of hardware temporaries\n");
|
||||
ralloc_free(graph);
|
||||
return;
|
||||
}
|
||||
/* Rewrite the registers */
|
||||
for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
|
||||
int reg = ra_get_node_reg(graph, node_index);
|
||||
unsigned int writemask = reg_get_writemask(reg);
|
||||
unsigned int index = reg_get_index(reg);
|
||||
struct rc_variable *var = var_ptr->Item;
|
||||
|
||||
/* Rewrite the registers */
|
||||
for (var_ptr = variables, node_index = 0; var_ptr;
|
||||
var_ptr = var_ptr->Next, node_index++) {
|
||||
int reg = ra_get_node_reg(graph, node_index);
|
||||
unsigned int writemask = reg_get_writemask(reg);
|
||||
unsigned int index = reg_get_index(reg);
|
||||
struct rc_variable * var = var_ptr->Item;
|
||||
if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
writemask = rc_variable_writemask_sum(var);
|
||||
}
|
||||
|
||||
if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
writemask = rc_variable_writemask_sum(var);
|
||||
}
|
||||
if (var->Dst.File == RC_FILE_INPUT) {
|
||||
continue;
|
||||
}
|
||||
rc_variable_change_dst(var, index, writemask);
|
||||
}
|
||||
|
||||
if (var->Dst.File == RC_FILE_INPUT) {
|
||||
continue;
|
||||
}
|
||||
rc_variable_change_dst(var, index, writemask);
|
||||
}
|
||||
|
||||
ralloc_free(graph);
|
||||
ralloc_free(graph);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -366,41 +347,38 @@ static void do_advanced_regalloc(struct regalloc_state * s)
|
|||
* only allocates space for input registers (\sa do_regalloc_inputs_only). If
|
||||
* user is non-zero, then the regular register allocator will be used
|
||||
* (\sa do_regalloc).
|
||||
*/
|
||||
void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
|
||||
*/
|
||||
void
|
||||
rc_pair_regalloc(struct radeon_compiler *cc, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *c =
|
||||
(struct r300_fragment_program_compiler*)cc;
|
||||
struct regalloc_state s;
|
||||
int * do_full_regalloc = (int*)user;
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
|
||||
struct regalloc_state s;
|
||||
int *do_full_regalloc = (int *)user;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = cc;
|
||||
s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
|
||||
s.Input = memory_pool_malloc(&cc->Pool,
|
||||
s.NumInputs * sizeof(struct register_info));
|
||||
memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = cc;
|
||||
s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
|
||||
s.Input = memory_pool_malloc(&cc->Pool, s.NumInputs * sizeof(struct register_info));
|
||||
memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
|
||||
|
||||
s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
|
||||
s.Temporary = memory_pool_malloc(&cc->Pool,
|
||||
s.NumTemporaries * sizeof(struct register_info));
|
||||
memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
|
||||
s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
|
||||
s.Temporary = memory_pool_malloc(&cc->Pool, s.NumTemporaries * sizeof(struct register_info));
|
||||
memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
|
||||
|
||||
rc_recompute_ips(s.C);
|
||||
rc_recompute_ips(s.C);
|
||||
|
||||
c->AllocateHwInputs(c, &alloc_input_simple, &s);
|
||||
if (*do_full_regalloc) {
|
||||
do_advanced_regalloc(&s);
|
||||
} else {
|
||||
s.Simple = 1;
|
||||
do_regalloc_inputs_only(&s);
|
||||
}
|
||||
c->AllocateHwInputs(c, &alloc_input_simple, &s);
|
||||
if (*do_full_regalloc) {
|
||||
do_advanced_regalloc(&s);
|
||||
} else {
|
||||
s.Simple = 1;
|
||||
do_regalloc_inputs_only(&s);
|
||||
}
|
||||
|
||||
/* Rewrite inputs and if we are doing the simple allocation, rewrite
|
||||
* temporaries too. */
|
||||
for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
|
||||
inst != &s.C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_remap_registers(inst, &remap_register, &s);
|
||||
}
|
||||
/* Rewrite inputs and if we are doing the simple allocation, rewrite
|
||||
* temporaries too. */
|
||||
for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
|
||||
inst != &s.C->Program.Instructions; inst = inst->Next) {
|
||||
rc_remap_registers(inst, &remap_register, &s);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -10,41 +10,41 @@
|
|||
|
||||
#include "util/compiler.h"
|
||||
|
||||
|
||||
/**
|
||||
* Finally rewrite ADD, MOV, MUL as the appropriate native instruction
|
||||
* and reverse the order of arguments for CMP.
|
||||
*/
|
||||
static void final_rewrite(struct rc_sub_instruction *inst)
|
||||
static void
|
||||
final_rewrite(struct rc_sub_instruction *inst)
|
||||
{
|
||||
struct rc_src_register tmp;
|
||||
struct rc_src_register tmp;
|
||||
|
||||
switch(inst->Opcode) {
|
||||
case RC_OPCODE_ADD:
|
||||
inst->SrcReg[2] = inst->SrcReg[1];
|
||||
inst->SrcReg[1].File = RC_FILE_NONE;
|
||||
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
||||
inst->SrcReg[1].Negate = RC_MASK_NONE;
|
||||
inst->Opcode = RC_OPCODE_MAD;
|
||||
break;
|
||||
case RC_OPCODE_CMP:
|
||||
tmp = inst->SrcReg[2];
|
||||
inst->SrcReg[2] = inst->SrcReg[0];
|
||||
inst->SrcReg[0] = tmp;
|
||||
break;
|
||||
case RC_OPCODE_MOV:
|
||||
inst->SrcReg[1] = inst->SrcReg[0];
|
||||
inst->Opcode = RC_OPCODE_MAX;
|
||||
break;
|
||||
case RC_OPCODE_MUL:
|
||||
inst->SrcReg[2].File = RC_FILE_NONE;
|
||||
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
|
||||
inst->Opcode = RC_OPCODE_MAD;
|
||||
break;
|
||||
default:
|
||||
/* nothing to do */
|
||||
break;
|
||||
}
|
||||
switch (inst->Opcode) {
|
||||
case RC_OPCODE_ADD:
|
||||
inst->SrcReg[2] = inst->SrcReg[1];
|
||||
inst->SrcReg[1].File = RC_FILE_NONE;
|
||||
inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
||||
inst->SrcReg[1].Negate = RC_MASK_NONE;
|
||||
inst->Opcode = RC_OPCODE_MAD;
|
||||
break;
|
||||
case RC_OPCODE_CMP:
|
||||
tmp = inst->SrcReg[2];
|
||||
inst->SrcReg[2] = inst->SrcReg[0];
|
||||
inst->SrcReg[0] = tmp;
|
||||
break;
|
||||
case RC_OPCODE_MOV:
|
||||
inst->SrcReg[1] = inst->SrcReg[0];
|
||||
inst->Opcode = RC_OPCODE_MAX;
|
||||
break;
|
||||
case RC_OPCODE_MUL:
|
||||
inst->SrcReg[2].File = RC_FILE_NONE;
|
||||
inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
|
||||
inst->Opcode = RC_OPCODE_MAD;
|
||||
break;
|
||||
default:
|
||||
/* nothing to do */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -55,326 +55,309 @@ static void final_rewrite(struct rc_sub_instruction *inst)
|
|||
* The output modifier cannot be disabled for a saturated MOV (MOV with clamping enabled).
|
||||
* RC_OMOD_DISABLE is only available on R5xx and is only valid with MIN/MAX/CMP/CND.
|
||||
*/
|
||||
static unsigned translate_omod(struct r300_fragment_program_compiler *c,
|
||||
struct rc_sub_instruction *inst)
|
||||
static unsigned
|
||||
translate_omod(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
|
||||
{
|
||||
if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode &&
|
||||
(inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN ||
|
||||
inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND))
|
||||
return RC_OMOD_DISABLE;
|
||||
return inst->Omod;
|
||||
if (c->Base.is_r500 && inst->Omod == RC_OMOD_MUL_1 && !inst->SaturateMode &&
|
||||
(inst->Opcode == RC_OPCODE_MAX || inst->Opcode == RC_OPCODE_MIN ||
|
||||
inst->Opcode == RC_OPCODE_CMP || inst->Opcode == RC_OPCODE_CND))
|
||||
return RC_OMOD_DISABLE;
|
||||
return inst->Omod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify an instruction according to which ALUs etc. it needs
|
||||
*/
|
||||
static void classify_instruction(struct rc_sub_instruction * inst,
|
||||
int * needrgb, int * needalpha, int * istranscendent)
|
||||
static void
|
||||
classify_instruction(struct rc_sub_instruction *inst, int *needrgb, int *needalpha,
|
||||
int *istranscendent)
|
||||
{
|
||||
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
|
||||
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
|
||||
*istranscendent = 0;
|
||||
*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
|
||||
*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
|
||||
*istranscendent = 0;
|
||||
|
||||
if (inst->WriteALUResult == RC_ALURESULT_X)
|
||||
*needrgb = 1;
|
||||
else if (inst->WriteALUResult == RC_ALURESULT_W)
|
||||
*needalpha = 1;
|
||||
if (inst->WriteALUResult == RC_ALURESULT_X)
|
||||
*needrgb = 1;
|
||||
else if (inst->WriteALUResult == RC_ALURESULT_W)
|
||||
*needalpha = 1;
|
||||
|
||||
switch(inst->Opcode) {
|
||||
case RC_OPCODE_ADD:
|
||||
case RC_OPCODE_CMP:
|
||||
case RC_OPCODE_CND:
|
||||
case RC_OPCODE_DDX:
|
||||
case RC_OPCODE_DDY:
|
||||
case RC_OPCODE_FRC:
|
||||
case RC_OPCODE_MAD:
|
||||
case RC_OPCODE_MAX:
|
||||
case RC_OPCODE_MIN:
|
||||
case RC_OPCODE_MOV:
|
||||
case RC_OPCODE_MUL:
|
||||
break;
|
||||
case RC_OPCODE_COS:
|
||||
case RC_OPCODE_EX2:
|
||||
case RC_OPCODE_LG2:
|
||||
case RC_OPCODE_RCP:
|
||||
case RC_OPCODE_RSQ:
|
||||
case RC_OPCODE_SIN:
|
||||
*istranscendent = 1;
|
||||
*needalpha = 1;
|
||||
break;
|
||||
case RC_OPCODE_DP4:
|
||||
*needalpha = 1;
|
||||
FALLTHROUGH;
|
||||
case RC_OPCODE_DP3:
|
||||
*needrgb = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (inst->Opcode) {
|
||||
case RC_OPCODE_ADD:
|
||||
case RC_OPCODE_CMP:
|
||||
case RC_OPCODE_CND:
|
||||
case RC_OPCODE_DDX:
|
||||
case RC_OPCODE_DDY:
|
||||
case RC_OPCODE_FRC:
|
||||
case RC_OPCODE_MAD:
|
||||
case RC_OPCODE_MAX:
|
||||
case RC_OPCODE_MIN:
|
||||
case RC_OPCODE_MOV:
|
||||
case RC_OPCODE_MUL: break;
|
||||
case RC_OPCODE_COS:
|
||||
case RC_OPCODE_EX2:
|
||||
case RC_OPCODE_LG2:
|
||||
case RC_OPCODE_RCP:
|
||||
case RC_OPCODE_RSQ:
|
||||
case RC_OPCODE_SIN:
|
||||
*istranscendent = 1;
|
||||
*needalpha = 1;
|
||||
break;
|
||||
case RC_OPCODE_DP4: *needalpha = 1; FALLTHROUGH;
|
||||
case RC_OPCODE_DP3: *needrgb = 1; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static void src_uses(struct rc_src_register src, unsigned int * rgb,
|
||||
unsigned int * alpha)
|
||||
static void
|
||||
src_uses(struct rc_src_register src, unsigned int *rgb, unsigned int *alpha)
|
||||
{
|
||||
int j;
|
||||
for(j = 0; j < 4; ++j) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, j);
|
||||
if (swz < 3)
|
||||
*rgb = 1;
|
||||
else if (swz < 4)
|
||||
*alpha = 1;
|
||||
}
|
||||
int j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, j);
|
||||
if (swz < 3)
|
||||
*rgb = 1;
|
||||
else if (swz < 4)
|
||||
*alpha = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the given ALU instruction's opcodes and source operands into the given pair,
|
||||
* if possible.
|
||||
*/
|
||||
static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
||||
struct rc_pair_instruction * pair,
|
||||
struct rc_sub_instruction * inst)
|
||||
static void
|
||||
set_pair_instruction(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *pair,
|
||||
struct rc_sub_instruction *inst)
|
||||
{
|
||||
int needrgb, needalpha, istranscendent;
|
||||
const struct rc_opcode_info * opcode;
|
||||
int i;
|
||||
int needrgb, needalpha, istranscendent;
|
||||
const struct rc_opcode_info *opcode;
|
||||
int i;
|
||||
|
||||
memset(pair, 0, sizeof(struct rc_pair_instruction));
|
||||
memset(pair, 0, sizeof(struct rc_pair_instruction));
|
||||
|
||||
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
|
||||
classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
|
||||
|
||||
if (needrgb) {
|
||||
if (istranscendent)
|
||||
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
|
||||
else
|
||||
pair->RGB.Opcode = inst->Opcode;
|
||||
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
||||
pair->RGB.Saturate = 1;
|
||||
}
|
||||
if (needalpha) {
|
||||
pair->Alpha.Opcode = inst->Opcode;
|
||||
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
||||
pair->Alpha.Saturate = 1;
|
||||
}
|
||||
if (needrgb) {
|
||||
if (istranscendent)
|
||||
pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
|
||||
else
|
||||
pair->RGB.Opcode = inst->Opcode;
|
||||
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
||||
pair->RGB.Saturate = 1;
|
||||
}
|
||||
if (needalpha) {
|
||||
pair->Alpha.Opcode = inst->Opcode;
|
||||
if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
|
||||
pair->Alpha.Saturate = 1;
|
||||
}
|
||||
|
||||
opcode = rc_get_opcode_info(inst->Opcode);
|
||||
opcode = rc_get_opcode_info(inst->Opcode);
|
||||
|
||||
/* Presubtract handling:
|
||||
* We need to make sure that the values used by the presubtract
|
||||
* operation end up in src0 or src1. */
|
||||
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
/* rc_pair_alloc_source() will fill in data for
|
||||
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
|
||||
int j;
|
||||
for(j = 0; j < 3; j++) {
|
||||
int src_regs;
|
||||
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
|
||||
continue;
|
||||
/* Presubtract handling:
|
||||
* We need to make sure that the values used by the presubtract
|
||||
* operation end up in src0 or src1. */
|
||||
if (inst->PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
/* rc_pair_alloc_source() will fill in data for
|
||||
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
|
||||
int j;
|
||||
for (j = 0; j < 3; j++) {
|
||||
int src_regs;
|
||||
if (inst->SrcReg[j].File != RC_FILE_PRESUB)
|
||||
continue;
|
||||
|
||||
src_regs = rc_presubtract_src_reg_count(
|
||||
inst->PreSub.Opcode);
|
||||
for(i = 0; i < src_regs; i++) {
|
||||
unsigned int rgb = 0;
|
||||
unsigned int alpha = 0;
|
||||
src_uses(inst->SrcReg[j], &rgb, &alpha);
|
||||
if(rgb) {
|
||||
pair->RGB.Src[i].File =
|
||||
inst->PreSub.SrcReg[i].File;
|
||||
pair->RGB.Src[i].Index =
|
||||
inst->PreSub.SrcReg[i].Index;
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
if(alpha) {
|
||||
pair->Alpha.Src[i].File =
|
||||
inst->PreSub.SrcReg[i].File;
|
||||
pair->Alpha.Src[i].Index =
|
||||
inst->PreSub.SrcReg[i].Index;
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < opcode->NumSrcRegs; ++i) {
|
||||
int source;
|
||||
if (needrgb && !istranscendent) {
|
||||
unsigned int srcrgb = 0;
|
||||
unsigned int srcalpha = 0;
|
||||
unsigned int srcmask = 0;
|
||||
int j;
|
||||
/* We don't care about the alpha channel here. We only
|
||||
* want the part of the swizzle that writes to rgb,
|
||||
* since we are creating an rgb instruction. */
|
||||
for(j = 0; j < 3; ++j) {
|
||||
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
|
||||
|
||||
if (swz < RC_SWIZZLE_W)
|
||||
srcrgb = 1;
|
||||
else if (swz == RC_SWIZZLE_W)
|
||||
srcalpha = 1;
|
||||
|
||||
/* We check for ZERO here as well because otherwise the zero
|
||||
* sign (which doesn't matter and we already ignore it previously
|
||||
* when checking for valid swizzle) could mess up the final negate sign.
|
||||
* Example problematic pattern where this would be produced is:
|
||||
* CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000}
|
||||
* ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
|
||||
*
|
||||
* after inline literals would become:
|
||||
* ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
|
||||
*
|
||||
* and after pair translate:
|
||||
* src0.xyz = const[0], src0.w = 4.000000 (0x48)
|
||||
* MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
|
||||
*
|
||||
* Without the zero check there would be -src0.w00.
|
||||
*/
|
||||
if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
|
||||
srcmask |= 1 << j;
|
||||
}
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
||||
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
||||
if (source < 0) {
|
||||
rc_error(&c->Base, "Failed to translate "
|
||||
"rgb instruction.\n");
|
||||
return;
|
||||
}
|
||||
pair->RGB.Arg[i].Source = source;
|
||||
pair->RGB.Arg[i].Swizzle =
|
||||
rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
|
||||
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
|
||||
}
|
||||
if (needalpha) {
|
||||
unsigned int srcrgb = 0;
|
||||
unsigned int srcalpha = 0;
|
||||
unsigned int swz;
|
||||
if (istranscendent) {
|
||||
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
|
||||
} else {
|
||||
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
|
||||
}
|
||||
|
||||
if (swz < 3)
|
||||
srcrgb = 1;
|
||||
else if (swz < 4)
|
||||
srcalpha = 1;
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
||||
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
||||
if (source < 0) {
|
||||
rc_error(&c->Base, "Failed to translate "
|
||||
"alpha instruction.\n");
|
||||
return;
|
||||
}
|
||||
pair->Alpha.Arg[i].Source = source;
|
||||
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
|
||||
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
|
||||
if (istranscendent) {
|
||||
pair->Alpha.Arg[i].Negate =
|
||||
!!(inst->SrcReg[i].Negate &
|
||||
inst->DstReg.WriteMask);
|
||||
} else {
|
||||
pair->Alpha.Arg[i].Negate =
|
||||
!!(inst->SrcReg[i].Negate & RC_MASK_W);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Destination handling */
|
||||
if (inst->DstReg.File == RC_FILE_OUTPUT) {
|
||||
if (inst->DstReg.Index == c->OutputDepth) {
|
||||
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
|
||||
} else {
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (inst->DstReg.Index == c->OutputColor[i]) {
|
||||
pair->RGB.Target = i;
|
||||
pair->Alpha.Target = i;
|
||||
pair->RGB.OutputWriteMask |=
|
||||
inst->DstReg.WriteMask & RC_MASK_XYZ;
|
||||
pair->Alpha.OutputWriteMask |=
|
||||
GET_BIT(inst->DstReg.WriteMask, 3);
|
||||
break;
|
||||
}
|
||||
src_regs = rc_presubtract_src_reg_count(inst->PreSub.Opcode);
|
||||
for (i = 0; i < src_regs; i++) {
|
||||
unsigned int rgb = 0;
|
||||
unsigned int alpha = 0;
|
||||
src_uses(inst->SrcReg[j], &rgb, &alpha);
|
||||
if (rgb) {
|
||||
pair->RGB.Src[i].File = inst->PreSub.SrcReg[i].File;
|
||||
pair->RGB.Src[i].Index = inst->PreSub.SrcReg[i].Index;
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (needrgb) {
|
||||
pair->RGB.DestIndex = inst->DstReg.Index;
|
||||
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
|
||||
}
|
||||
if (alpha) {
|
||||
pair->Alpha.Src[i].File = inst->PreSub.SrcReg[i].File;
|
||||
pair->Alpha.Src[i].Index = inst->PreSub.SrcReg[i].Index;
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (needalpha) {
|
||||
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
|
||||
if (pair->Alpha.WriteMask) {
|
||||
pair->Alpha.DestIndex = inst->DstReg.Index;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < opcode->NumSrcRegs; ++i) {
|
||||
int source;
|
||||
if (needrgb && !istranscendent) {
|
||||
unsigned int srcrgb = 0;
|
||||
unsigned int srcalpha = 0;
|
||||
unsigned int srcmask = 0;
|
||||
int j;
|
||||
/* We don't care about the alpha channel here. We only
|
||||
* want the part of the swizzle that writes to rgb,
|
||||
* since we are creating an rgb instruction. */
|
||||
for (j = 0; j < 3; ++j) {
|
||||
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
|
||||
|
||||
if (needrgb) {
|
||||
pair->RGB.Omod = translate_omod(c, inst);
|
||||
}
|
||||
if (needalpha) {
|
||||
pair->Alpha.Omod = translate_omod(c, inst);
|
||||
}
|
||||
if (swz < RC_SWIZZLE_W)
|
||||
srcrgb = 1;
|
||||
else if (swz == RC_SWIZZLE_W)
|
||||
srcalpha = 1;
|
||||
|
||||
if (inst->WriteALUResult) {
|
||||
pair->WriteALUResult = inst->WriteALUResult;
|
||||
pair->ALUResultCompare = inst->ALUResultCompare;
|
||||
}
|
||||
/* We check for ZERO here as well because otherwise the zero
|
||||
* sign (which doesn't matter and we already ignore it previously
|
||||
* when checking for valid swizzle) could mess up the final negate sign.
|
||||
* Example problematic pattern where this would be produced is:
|
||||
* CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000}
|
||||
* ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
|
||||
*
|
||||
* after inline literals would become:
|
||||
* ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
|
||||
*
|
||||
* and after pair translate:
|
||||
* src0.xyz = const[0], src0.w = 4.000000 (0x48)
|
||||
* MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
|
||||
*
|
||||
* Without the zero check there would be -src0.w00.
|
||||
*/
|
||||
if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
|
||||
srcmask |= 1 << j;
|
||||
}
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
|
||||
inst->SrcReg[i].Index);
|
||||
if (source < 0) {
|
||||
rc_error(&c->Base, "Failed to translate "
|
||||
"rgb instruction.\n");
|
||||
return;
|
||||
}
|
||||
pair->RGB.Arg[i].Source = source;
|
||||
pair->RGB.Arg[i].Swizzle = rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
|
||||
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
pair->RGB.Arg[i].Negate =
|
||||
!!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
|
||||
}
|
||||
if (needalpha) {
|
||||
unsigned int srcrgb = 0;
|
||||
unsigned int srcalpha = 0;
|
||||
unsigned int swz;
|
||||
if (istranscendent) {
|
||||
swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
|
||||
} else {
|
||||
swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
|
||||
}
|
||||
|
||||
if (swz < 3)
|
||||
srcrgb = 1;
|
||||
else if (swz < 4)
|
||||
srcalpha = 1;
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File,
|
||||
inst->SrcReg[i].Index);
|
||||
if (source < 0) {
|
||||
rc_error(&c->Base, "Failed to translate "
|
||||
"alpha instruction.\n");
|
||||
return;
|
||||
}
|
||||
pair->Alpha.Arg[i].Source = source;
|
||||
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
|
||||
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
|
||||
if (istranscendent) {
|
||||
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & inst->DstReg.WriteMask);
|
||||
} else {
|
||||
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Destination handling */
|
||||
if (inst->DstReg.File == RC_FILE_OUTPUT) {
|
||||
if (inst->DstReg.Index == c->OutputDepth) {
|
||||
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
|
||||
} else {
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (inst->DstReg.Index == c->OutputColor[i]) {
|
||||
pair->RGB.Target = i;
|
||||
pair->Alpha.Target = i;
|
||||
pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
|
||||
pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (needrgb) {
|
||||
pair->RGB.DestIndex = inst->DstReg.Index;
|
||||
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
|
||||
}
|
||||
|
||||
if (needalpha) {
|
||||
pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
|
||||
if (pair->Alpha.WriteMask) {
|
||||
pair->Alpha.DestIndex = inst->DstReg.Index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (needrgb) {
|
||||
pair->RGB.Omod = translate_omod(c, inst);
|
||||
}
|
||||
if (needalpha) {
|
||||
pair->Alpha.Omod = translate_omod(c, inst);
|
||||
}
|
||||
|
||||
if (inst->WriteALUResult) {
|
||||
pair->WriteALUResult = inst->WriteALUResult;
|
||||
pair->ALUResultCompare = inst->ALUResultCompare;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void check_opcode_support(struct r300_fragment_program_compiler *c,
|
||||
struct rc_sub_instruction *inst)
|
||||
static void
|
||||
check_opcode_support(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
|
||||
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
|
||||
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
|
||||
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->SrcReg[i].RelAddr) {
|
||||
rc_error(&c->Base, "Fragment program does not support relative addressing "
|
||||
" of source operands.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->SrcReg[i].RelAddr) {
|
||||
rc_error(&c->Base, "Fragment program does not support relative addressing "
|
||||
" of source operands.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate all ALU instructions into corresponding pair instructions,
|
||||
* performing no other changes.
|
||||
*/
|
||||
void rc_pair_translate(struct radeon_compiler *cc, void *user)
|
||||
void
|
||||
rc_pair_translate(struct radeon_compiler *cc, void *user)
|
||||
{
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler *)cc;
|
||||
|
||||
for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
|
||||
inst != &c->Base.Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode;
|
||||
struct rc_sub_instruction copy;
|
||||
for (struct rc_instruction *inst = c->Base.Program.Instructions.Next;
|
||||
inst != &c->Base.Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode;
|
||||
struct rc_sub_instruction copy;
|
||||
|
||||
if (inst->Type != RC_INSTRUCTION_NORMAL)
|
||||
continue;
|
||||
if (inst->Type != RC_INSTRUCTION_NORMAL)
|
||||
continue;
|
||||
|
||||
opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
|
||||
continue;
|
||||
if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
|
||||
continue;
|
||||
|
||||
copy = inst->U.I;
|
||||
copy = inst->U.I;
|
||||
|
||||
check_opcode_support(c, ©);
|
||||
check_opcode_support(c, ©);
|
||||
|
||||
final_rewrite(©);
|
||||
inst->Type = RC_INSTRUCTION_PAIR;
|
||||
set_pair_instruction(c, &inst->U.P, ©);
|
||||
}
|
||||
final_rewrite(©);
|
||||
inst->Type = RC_INSTRUCTION_PAIR;
|
||||
set_pair_instruction(c, &inst->U.P, ©);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
#include "radeon_compiler.h"
|
||||
#include "radeon_dataflow.h"
|
||||
|
||||
|
||||
/**
|
||||
* Transform the given clause in the following way:
|
||||
* 1. Replace it with an empty clause
|
||||
|
|
@ -26,108 +25,108 @@
|
|||
* \note The transform is called 'local' because it can only look at
|
||||
* one instruction at a time.
|
||||
*/
|
||||
void rc_local_transform(
|
||||
struct radeon_compiler * c,
|
||||
void *user)
|
||||
void
|
||||
rc_local_transform(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct radeon_program_transformation *transformations =
|
||||
(struct radeon_program_transformation*)user;
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
struct radeon_program_transformation *transformations =
|
||||
(struct radeon_program_transformation *)user;
|
||||
struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
|
||||
while(inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * current = inst;
|
||||
int i;
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction *current = inst;
|
||||
int i;
|
||||
|
||||
inst = inst->Next;
|
||||
inst = inst->Next;
|
||||
|
||||
for(i = 0; transformations[i].function; ++i) {
|
||||
struct radeon_program_transformation* t = transformations + i;
|
||||
for (i = 0; transformations[i].function; ++i) {
|
||||
struct radeon_program_transformation *t = transformations + i;
|
||||
|
||||
if (t->function(c, current, t->userData))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (t->function(c, current, t->userData))
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int rc_find_free_temporary(struct radeon_compiler * c)
|
||||
unsigned int
|
||||
rc_find_free_temporary(struct radeon_compiler *c)
|
||||
{
|
||||
/* Find the largest used temp index when called for the first time. */
|
||||
if (c->max_temp_index == -1) {
|
||||
for (struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
if (opcode->HasDstReg &&
|
||||
inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
|
||||
inst->U.I.WriteALUResult == RC_ALURESULT_NONE &&
|
||||
inst->U.I.DstReg.Index > c->max_temp_index)
|
||||
c->max_temp_index = inst->U.I.DstReg.Index;
|
||||
}
|
||||
}
|
||||
/* Find the largest used temp index when called for the first time. */
|
||||
if (c->max_temp_index == -1) {
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
if (opcode->HasDstReg && inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
|
||||
inst->U.I.WriteALUResult == RC_ALURESULT_NONE &&
|
||||
inst->U.I.DstReg.Index > c->max_temp_index)
|
||||
c->max_temp_index = inst->U.I.DstReg.Index;
|
||||
}
|
||||
}
|
||||
|
||||
c->max_temp_index++;
|
||||
if (c->max_temp_index > RC_REGISTER_MAX_INDEX) {
|
||||
rc_error(c, "Ran out of temporary registers\n");
|
||||
return 0;
|
||||
}
|
||||
return c->max_temp_index;
|
||||
c->max_temp_index++;
|
||||
if (c->max_temp_index > RC_REGISTER_MAX_INDEX) {
|
||||
rc_error(c, "Ran out of temporary registers\n");
|
||||
return 0;
|
||||
}
|
||||
return c->max_temp_index;
|
||||
}
|
||||
|
||||
|
||||
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
|
||||
struct rc_instruction *
|
||||
rc_alloc_instruction(struct radeon_compiler *c)
|
||||
{
|
||||
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
|
||||
struct rc_instruction *inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
|
||||
|
||||
memset(inst, 0, sizeof(struct rc_instruction));
|
||||
memset(inst, 0, sizeof(struct rc_instruction));
|
||||
|
||||
inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
|
||||
inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
|
||||
|
||||
return inst;
|
||||
return inst;
|
||||
}
|
||||
|
||||
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
|
||||
void
|
||||
rc_insert_instruction(struct rc_instruction *after, struct rc_instruction *inst)
|
||||
{
|
||||
inst->Prev = after;
|
||||
inst->Next = after->Next;
|
||||
inst->Prev = after;
|
||||
inst->Next = after->Next;
|
||||
|
||||
inst->Prev->Next = inst;
|
||||
inst->Next->Prev = inst;
|
||||
inst->Prev->Next = inst;
|
||||
inst->Next->Prev = inst;
|
||||
}
|
||||
|
||||
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
|
||||
struct rc_instruction *
|
||||
rc_insert_new_instruction(struct radeon_compiler *c, struct rc_instruction *after)
|
||||
{
|
||||
struct rc_instruction * inst = rc_alloc_instruction(c);
|
||||
struct rc_instruction *inst = rc_alloc_instruction(c);
|
||||
|
||||
rc_insert_instruction(after, inst);
|
||||
rc_insert_instruction(after, inst);
|
||||
|
||||
return inst;
|
||||
return inst;
|
||||
}
|
||||
|
||||
void rc_remove_instruction(struct rc_instruction * inst)
|
||||
void
|
||||
rc_remove_instruction(struct rc_instruction *inst)
|
||||
{
|
||||
inst->Prev->Next = inst->Next;
|
||||
inst->Next->Prev = inst->Prev;
|
||||
inst->Prev->Next = inst->Next;
|
||||
inst->Next->Prev = inst->Prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of instructions in the program.
|
||||
*/
|
||||
unsigned int rc_recompute_ips(struct radeon_compiler * c)
|
||||
unsigned int
|
||||
rc_recompute_ips(struct radeon_compiler *c)
|
||||
{
|
||||
unsigned int ip = 0;
|
||||
struct rc_instruction * inst;
|
||||
unsigned int ip = 0;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
inst->IP = ip++;
|
||||
}
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
inst->IP = ip++;
|
||||
}
|
||||
|
||||
c->Program.Instructions.IP = 0xcafedead;
|
||||
c->Program.Instructions.IP = 0xcafedead;
|
||||
|
||||
return ip;
|
||||
return ip;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,39 +9,39 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "radeon_opcodes.h"
|
||||
#include "radeon_code.h"
|
||||
#include "radeon_opcodes.h"
|
||||
#include "radeon_program_constants.h"
|
||||
#include "radeon_program_pair.h"
|
||||
|
||||
struct radeon_compiler;
|
||||
|
||||
struct rc_src_register {
|
||||
unsigned int File:4;
|
||||
unsigned int File : 4;
|
||||
|
||||
/** Negative values may be used for relative addressing. */
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int RelAddr:1;
|
||||
/** Negative values may be used for relative addressing. */
|
||||
unsigned int Index : RC_REGISTER_INDEX_BITS;
|
||||
unsigned int RelAddr : 1;
|
||||
|
||||
unsigned int Swizzle:12;
|
||||
unsigned int Swizzle : 12;
|
||||
|
||||
/** Take the component-wise absolute value */
|
||||
unsigned int Abs:1;
|
||||
/** Take the component-wise absolute value */
|
||||
unsigned int Abs : 1;
|
||||
|
||||
/** Post-Abs negation. */
|
||||
unsigned int Negate:4;
|
||||
/** Post-Abs negation. */
|
||||
unsigned int Negate : 4;
|
||||
};
|
||||
|
||||
struct rc_dst_register {
|
||||
unsigned int File:3;
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int WriteMask:4;
|
||||
unsigned int Pred:2;
|
||||
unsigned int File : 3;
|
||||
unsigned int Index : RC_REGISTER_INDEX_BITS;
|
||||
unsigned int WriteMask : 4;
|
||||
unsigned int Pred : 2;
|
||||
};
|
||||
|
||||
struct rc_presub_instruction {
|
||||
rc_presubtract_op Opcode;
|
||||
struct rc_src_register SrcReg[2];
|
||||
rc_presubtract_op Opcode;
|
||||
struct rc_src_register SrcReg[2];
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -53,94 +53,91 @@ struct rc_presub_instruction {
|
|||
* instruction types may be valid.
|
||||
*/
|
||||
struct rc_sub_instruction {
|
||||
struct rc_src_register SrcReg[3];
|
||||
struct rc_dst_register DstReg;
|
||||
struct rc_src_register SrcReg[3];
|
||||
struct rc_dst_register DstReg;
|
||||
|
||||
/**
|
||||
* Opcode of this instruction, according to \ref rc_opcode enums.
|
||||
*/
|
||||
unsigned int Opcode:8;
|
||||
/**
|
||||
* Opcode of this instruction, according to \ref rc_opcode enums.
|
||||
*/
|
||||
unsigned int Opcode : 8;
|
||||
|
||||
/**
|
||||
* Saturate each value of the result to the range [0,1] or [-1,1],
|
||||
* according to \ref rc_saturate_mode enums.
|
||||
*/
|
||||
unsigned int SaturateMode:2;
|
||||
/**
|
||||
* Saturate each value of the result to the range [0,1] or [-1,1],
|
||||
* according to \ref rc_saturate_mode enums.
|
||||
*/
|
||||
unsigned int SaturateMode : 2;
|
||||
|
||||
/**
|
||||
* Writing to the special register RC_SPECIAL_ALU_RESULT
|
||||
*/
|
||||
/*@{*/
|
||||
unsigned int WriteALUResult:2;
|
||||
unsigned int ALUResultCompare:3;
|
||||
/*@}*/
|
||||
/**
|
||||
* Writing to the special register RC_SPECIAL_ALU_RESULT
|
||||
*/
|
||||
/*@{*/
|
||||
unsigned int WriteALUResult : 2;
|
||||
unsigned int ALUResultCompare : 3;
|
||||
/*@}*/
|
||||
|
||||
/**
|
||||
* \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
|
||||
*/
|
||||
/*@{*/
|
||||
/** Source texture unit. */
|
||||
unsigned int TexSrcUnit:5;
|
||||
/**
|
||||
* \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
|
||||
*/
|
||||
/*@{*/
|
||||
/** Source texture unit. */
|
||||
unsigned int TexSrcUnit : 5;
|
||||
|
||||
/** Source texture target, one of the \ref rc_texture_target enums */
|
||||
unsigned int TexSrcTarget:3;
|
||||
/** Source texture target, one of the \ref rc_texture_target enums */
|
||||
unsigned int TexSrcTarget : 3;
|
||||
|
||||
/** True if tex instruction should do shadow comparison */
|
||||
unsigned int TexShadow:1;
|
||||
/** True if tex instruction should do shadow comparison */
|
||||
unsigned int TexShadow : 1;
|
||||
|
||||
/**/
|
||||
unsigned int TexSemWait:1;
|
||||
unsigned int TexSemAcquire:1;
|
||||
/**/
|
||||
unsigned int TexSemWait : 1;
|
||||
unsigned int TexSemAcquire : 1;
|
||||
|
||||
/**R500 Only. How to swizzle the result of a TEX lookup*/
|
||||
unsigned int TexSwizzle:12;
|
||||
/*@}*/
|
||||
/**R500 Only. How to swizzle the result of a TEX lookup*/
|
||||
unsigned int TexSwizzle : 12;
|
||||
/*@}*/
|
||||
|
||||
/** This holds information about the presubtract operation used by
|
||||
* this instruction. */
|
||||
struct rc_presub_instruction PreSub;
|
||||
/** This holds information about the presubtract operation used by
|
||||
* this instruction. */
|
||||
struct rc_presub_instruction PreSub;
|
||||
|
||||
rc_omod_op Omod;
|
||||
rc_omod_op Omod;
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
RC_INSTRUCTION_NORMAL = 0,
|
||||
RC_INSTRUCTION_PAIR
|
||||
} rc_instruction_type;
|
||||
typedef enum { RC_INSTRUCTION_NORMAL = 0, RC_INSTRUCTION_PAIR } rc_instruction_type;
|
||||
|
||||
struct rc_instruction {
|
||||
struct rc_instruction * Prev;
|
||||
struct rc_instruction * Next;
|
||||
struct rc_instruction *Prev;
|
||||
struct rc_instruction *Next;
|
||||
|
||||
rc_instruction_type Type;
|
||||
union {
|
||||
struct rc_sub_instruction I;
|
||||
struct rc_pair_instruction P;
|
||||
} U;
|
||||
rc_instruction_type Type;
|
||||
union {
|
||||
struct rc_sub_instruction I;
|
||||
struct rc_pair_instruction P;
|
||||
} U;
|
||||
|
||||
/**
|
||||
* Warning: IPs are not stable. If you want to use them,
|
||||
* you need to recompute them at the beginning of each pass
|
||||
* using \ref rc_recompute_ips
|
||||
*/
|
||||
unsigned int IP;
|
||||
/**
|
||||
* Warning: IPs are not stable. If you want to use them,
|
||||
* you need to recompute them at the beginning of each pass
|
||||
* using \ref rc_recompute_ips
|
||||
*/
|
||||
unsigned int IP;
|
||||
};
|
||||
|
||||
struct rc_program {
|
||||
/**
|
||||
* Instructions.Next points to the first instruction,
|
||||
* Instructions.Prev points to the last instruction.
|
||||
*/
|
||||
struct rc_instruction Instructions;
|
||||
/**
|
||||
* Instructions.Next points to the first instruction,
|
||||
* Instructions.Prev points to the last instruction.
|
||||
*/
|
||||
struct rc_instruction Instructions;
|
||||
|
||||
/* Long term, we should probably remove InputsRead & OutputsWritten,
|
||||
* since updating dependent state can be fragile, and they aren't
|
||||
* actually used very often. */
|
||||
uint32_t InputsRead;
|
||||
uint32_t OutputsWritten;
|
||||
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
|
||||
/* Long term, we should probably remove InputsRead & OutputsWritten,
|
||||
* since updating dependent state can be fragile, and they aren't
|
||||
* actually used very often. */
|
||||
uint32_t InputsRead;
|
||||
uint32_t OutputsWritten;
|
||||
uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
|
||||
|
||||
struct rc_constant_list Constants;
|
||||
struct rc_constant_list Constants;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -154,36 +151,27 @@ struct rc_program {
|
|||
* The function gets passed the userData as last parameter.
|
||||
*/
|
||||
struct radeon_program_transformation {
|
||||
int (*function)(
|
||||
struct radeon_compiler*,
|
||||
struct rc_instruction*,
|
||||
void*);
|
||||
void *userData;
|
||||
int (*function)(struct radeon_compiler *, struct rc_instruction *, void *);
|
||||
void *userData;
|
||||
};
|
||||
|
||||
void rc_local_transform(
|
||||
struct radeon_compiler *c,
|
||||
void *user);
|
||||
void rc_local_transform(struct radeon_compiler *c, void *user);
|
||||
|
||||
void rc_get_used_temporaries(
|
||||
struct radeon_compiler * c,
|
||||
unsigned char * used,
|
||||
unsigned int used_length);
|
||||
void rc_get_used_temporaries(struct radeon_compiler *c, unsigned char *used,
|
||||
unsigned int used_length);
|
||||
|
||||
int rc_find_free_temporary_list(
|
||||
struct radeon_compiler * c,
|
||||
unsigned char * used,
|
||||
unsigned int used_length,
|
||||
unsigned int mask);
|
||||
int rc_find_free_temporary_list(struct radeon_compiler *c, unsigned char *used,
|
||||
unsigned int used_length, unsigned int mask);
|
||||
|
||||
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
|
||||
unsigned int rc_find_free_temporary(struct radeon_compiler *c);
|
||||
|
||||
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
|
||||
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
|
||||
void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
|
||||
void rc_remove_instruction(struct rc_instruction * inst);
|
||||
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler *c);
|
||||
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler *c,
|
||||
struct rc_instruction *after);
|
||||
void rc_insert_instruction(struct rc_instruction *after, struct rc_instruction *inst);
|
||||
void rc_remove_instruction(struct rc_instruction *inst);
|
||||
|
||||
unsigned int rc_recompute_ips(struct radeon_compiler * c);
|
||||
unsigned int rc_recompute_ips(struct radeon_compiler *c);
|
||||
|
||||
void rc_print_program(const struct rc_program *prog);
|
||||
|
||||
|
|
|
|||
|
|
@ -19,183 +19,183 @@
|
|||
|
||||
#include "util/log.h"
|
||||
|
||||
static struct rc_instruction *emit1(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
rc_opcode Opcode, struct rc_sub_instruction * base,
|
||||
struct rc_dst_register DstReg, struct rc_src_register SrcReg)
|
||||
static struct rc_instruction *
|
||||
emit1(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
|
||||
struct rc_sub_instruction *base, struct rc_dst_register DstReg, struct rc_src_register SrcReg)
|
||||
{
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg;
|
||||
return fpi;
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
static struct rc_instruction *emit2(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
rc_opcode Opcode, struct rc_sub_instruction * base,
|
||||
struct rc_dst_register DstReg,
|
||||
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
|
||||
static struct rc_instruction *
|
||||
emit2(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
|
||||
struct rc_sub_instruction *base, struct rc_dst_register DstReg,
|
||||
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
|
||||
{
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg0;
|
||||
fpi->U.I.SrcReg[1] = SrcReg1;
|
||||
return fpi;
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg0;
|
||||
fpi->U.I.SrcReg[1] = SrcReg1;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
static struct rc_instruction *emit3(
|
||||
struct radeon_compiler * c, struct rc_instruction * after,
|
||||
rc_opcode Opcode, struct rc_sub_instruction * base,
|
||||
struct rc_dst_register DstReg,
|
||||
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
|
||||
struct rc_src_register SrcReg2)
|
||||
static struct rc_instruction *
|
||||
emit3(struct radeon_compiler *c, struct rc_instruction *after, rc_opcode Opcode,
|
||||
struct rc_sub_instruction *base, struct rc_dst_register DstReg,
|
||||
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
|
||||
struct rc_src_register SrcReg2)
|
||||
{
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
|
||||
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
if (base) {
|
||||
memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
|
||||
}
|
||||
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg0;
|
||||
fpi->U.I.SrcReg[1] = SrcReg1;
|
||||
fpi->U.I.SrcReg[2] = SrcReg2;
|
||||
return fpi;
|
||||
fpi->U.I.Opcode = Opcode;
|
||||
fpi->U.I.DstReg = DstReg;
|
||||
fpi->U.I.SrcReg[0] = SrcReg0;
|
||||
fpi->U.I.SrcReg[1] = SrcReg1;
|
||||
fpi->U.I.SrcReg[2] = SrcReg2;
|
||||
return fpi;
|
||||
}
|
||||
|
||||
static struct rc_dst_register dstregtmpmask(int index, int mask)
|
||||
static struct rc_dst_register
|
||||
dstregtmpmask(int index, int mask)
|
||||
{
|
||||
struct rc_dst_register dst = {0, 0, 0};
|
||||
dst.File = RC_FILE_TEMPORARY;
|
||||
dst.Index = index;
|
||||
dst.WriteMask = mask;
|
||||
return dst;
|
||||
struct rc_dst_register dst = {0, 0, 0};
|
||||
dst.File = RC_FILE_TEMPORARY;
|
||||
dst.Index = index;
|
||||
dst.WriteMask = mask;
|
||||
return dst;
|
||||
}
|
||||
|
||||
static const struct rc_src_register builtin_one = {
|
||||
.File = RC_FILE_NONE,
|
||||
.Index = 0,
|
||||
.Swizzle = RC_SWIZZLE_1111
|
||||
};
|
||||
.File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_1111};
|
||||
|
||||
static const struct rc_src_register srcreg_undefined = {
|
||||
.File = RC_FILE_NONE,
|
||||
.Index = 0,
|
||||
.Swizzle = RC_SWIZZLE_XYZW
|
||||
};
|
||||
.File = RC_FILE_NONE, .Index = 0, .Swizzle = RC_SWIZZLE_XYZW};
|
||||
|
||||
static struct rc_src_register srcreg(int file, int index)
|
||||
static struct rc_src_register
|
||||
srcreg(int file, int index)
|
||||
{
|
||||
struct rc_src_register src = srcreg_undefined;
|
||||
src.File = file;
|
||||
src.Index = index;
|
||||
return src;
|
||||
struct rc_src_register src = srcreg_undefined;
|
||||
src.File = file;
|
||||
src.Index = index;
|
||||
return src;
|
||||
}
|
||||
|
||||
static struct rc_src_register srcregswz(int file, int index, int swz)
|
||||
static struct rc_src_register
|
||||
srcregswz(int file, int index, int swz)
|
||||
{
|
||||
struct rc_src_register src = srcreg_undefined;
|
||||
src.File = file;
|
||||
src.Index = index;
|
||||
src.Swizzle = swz;
|
||||
return src;
|
||||
struct rc_src_register src = srcreg_undefined;
|
||||
src.File = file;
|
||||
src.Index = index;
|
||||
src.Swizzle = swz;
|
||||
return src;
|
||||
}
|
||||
|
||||
static struct rc_src_register absolute(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
absolute(struct rc_src_register reg)
|
||||
{
|
||||
struct rc_src_register newreg = reg;
|
||||
newreg.Abs = 1;
|
||||
newreg.Negate = RC_MASK_NONE;
|
||||
return newreg;
|
||||
struct rc_src_register newreg = reg;
|
||||
newreg.Abs = 1;
|
||||
newreg.Negate = RC_MASK_NONE;
|
||||
return newreg;
|
||||
}
|
||||
|
||||
static struct rc_src_register negate(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
negate(struct rc_src_register reg)
|
||||
{
|
||||
struct rc_src_register newreg = reg;
|
||||
newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
|
||||
return newreg;
|
||||
struct rc_src_register newreg = reg;
|
||||
newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
|
||||
return newreg;
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle(struct rc_src_register reg,
|
||||
rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
|
||||
static struct rc_src_register
|
||||
swizzle(struct rc_src_register reg, rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
|
||||
{
|
||||
struct rc_src_register swizzled = reg;
|
||||
swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
|
||||
return swizzled;
|
||||
struct rc_src_register swizzled = reg;
|
||||
swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle_smear(struct rc_src_register reg,
|
||||
rc_swizzle x)
|
||||
static struct rc_src_register
|
||||
swizzle_smear(struct rc_src_register reg, rc_swizzle x)
|
||||
{
|
||||
return swizzle(reg, x, x, x, x);
|
||||
return swizzle(reg, x, x, x, x);
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
swizzle_xxxx(struct rc_src_register reg)
|
||||
{
|
||||
return swizzle_smear(reg, RC_SWIZZLE_X);
|
||||
return swizzle_smear(reg, RC_SWIZZLE_X);
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
swizzle_yyyy(struct rc_src_register reg)
|
||||
{
|
||||
return swizzle_smear(reg, RC_SWIZZLE_Y);
|
||||
return swizzle_smear(reg, RC_SWIZZLE_Y);
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
swizzle_zzzz(struct rc_src_register reg)
|
||||
{
|
||||
return swizzle_smear(reg, RC_SWIZZLE_Z);
|
||||
return swizzle_smear(reg, RC_SWIZZLE_Z);
|
||||
}
|
||||
|
||||
static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
|
||||
static struct rc_src_register
|
||||
swizzle_wwww(struct rc_src_register reg)
|
||||
{
|
||||
return swizzle_smear(reg, RC_SWIZZLE_W);
|
||||
return swizzle_smear(reg, RC_SWIZZLE_W);
|
||||
}
|
||||
|
||||
static struct rc_dst_register new_dst_reg(struct radeon_compiler *c,
|
||||
struct rc_instruction *inst)
|
||||
static struct rc_dst_register
|
||||
new_dst_reg(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
unsigned tmp = rc_find_free_temporary(c);
|
||||
return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
|
||||
unsigned tmp = rc_find_free_temporary(c);
|
||||
return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
|
||||
}
|
||||
|
||||
static void transform_DP2(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_DP2(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
struct rc_src_register src0 = inst->U.I.SrcReg[0];
|
||||
struct rc_src_register src1 = inst->U.I.SrcReg[1];
|
||||
src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
|
||||
src0.Swizzle &= ~(63 << (3 * 2));
|
||||
src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
|
||||
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
|
||||
src1.Swizzle &= ~(63 << (3 * 2));
|
||||
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
|
||||
emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
|
||||
rc_remove_instruction(inst);
|
||||
struct rc_src_register src0 = inst->U.I.SrcReg[0];
|
||||
struct rc_src_register src1 = inst->U.I.SrcReg[1];
|
||||
src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
|
||||
src0.Swizzle &= ~(63 << (3 * 2));
|
||||
src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
|
||||
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
|
||||
src1.Swizzle &= ~(63 << (3 * 2));
|
||||
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
|
||||
emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_RSQ(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_RSQ(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
|
||||
}
|
||||
|
||||
static void transform_KILP(struct radeon_compiler * c,
|
||||
struct rc_instruction * inst)
|
||||
static void
|
||||
transform_KILP(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
inst->U.I.SrcReg[0] = negate(builtin_one);
|
||||
inst->U.I.Opcode = RC_OPCODE_KIL;
|
||||
inst->U.I.SrcReg[0] = negate(builtin_one);
|
||||
inst->U.I.Opcode = RC_OPCODE_KIL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -207,165 +207,150 @@ static void transform_KILP(struct radeon_compiler * c,
|
|||
*
|
||||
* @note should be applicable to R300 and R500 fragment programs.
|
||||
*/
|
||||
int radeonTransformALU(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
int
|
||||
radeonTransformALU(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
|
||||
{
|
||||
switch(inst->U.I.Opcode) {
|
||||
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
|
||||
case RC_OPCODE_KILP: transform_KILP(c, inst); return 1;
|
||||
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
|
||||
case RC_OPCODE_SEQ: unreachable();
|
||||
case RC_OPCODE_SGE: unreachable();
|
||||
case RC_OPCODE_SLT: unreachable();
|
||||
case RC_OPCODE_SNE: unreachable();
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
switch (inst->U.I.Opcode) {
|
||||
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
|
||||
case RC_OPCODE_KILP: transform_KILP(c, inst); return 1;
|
||||
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
|
||||
case RC_OPCODE_SEQ: unreachable();
|
||||
case RC_OPCODE_SGE: unreachable();
|
||||
case RC_OPCODE_SLT: unreachable();
|
||||
case RC_OPCODE_SNE: unreachable();
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_CMP(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_r300_vertex_CMP(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
/* R5xx has a CMP, but we can use it only if it reads from less than
|
||||
* three different temps. */
|
||||
if (c->is_r500 && !rc_inst_has_three_diff_temp_srcs(inst))
|
||||
return;
|
||||
/* R5xx has a CMP, but we can use it only if it reads from less than
|
||||
* three different temps. */
|
||||
if (c->is_r500 && !rc_inst_has_three_diff_temp_srcs(inst))
|
||||
return;
|
||||
|
||||
unreachable();
|
||||
unreachable();
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_DP2(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_r300_vertex_DP2(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
struct rc_instruction *next_inst = inst->Next;
|
||||
transform_DP2(c, inst);
|
||||
next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
|
||||
struct rc_instruction *next_inst = inst->Next;
|
||||
transform_DP2(c, inst);
|
||||
next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_DP3(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_r300_vertex_DP3(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
struct rc_src_register src0 = inst->U.I.SrcReg[0];
|
||||
struct rc_src_register src1 = inst->U.I.SrcReg[1];
|
||||
src0.Negate &= ~RC_MASK_W;
|
||||
src0.Swizzle &= ~(7 << (3 * 3));
|
||||
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
|
||||
src1.Negate &= ~RC_MASK_W;
|
||||
src1.Swizzle &= ~(7 << (3 * 3));
|
||||
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
|
||||
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
|
||||
rc_remove_instruction(inst);
|
||||
struct rc_src_register src0 = inst->U.I.SrcReg[0];
|
||||
struct rc_src_register src1 = inst->U.I.SrcReg[1];
|
||||
src0.Negate &= ~RC_MASK_W;
|
||||
src0.Swizzle &= ~(7 << (3 * 3));
|
||||
src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
|
||||
src1.Negate &= ~RC_MASK_W;
|
||||
src1.Swizzle &= ~(7 << (3 * 3));
|
||||
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
|
||||
emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst)
|
||||
static void
|
||||
transform_r300_vertex_fix_LIT(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
struct rc_dst_register dst = new_dst_reg(c, inst);
|
||||
unsigned constant_swizzle;
|
||||
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
|
||||
0.0000000000000000001,
|
||||
&constant_swizzle);
|
||||
struct rc_dst_register dst = new_dst_reg(c, inst);
|
||||
unsigned constant_swizzle;
|
||||
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001,
|
||||
&constant_swizzle);
|
||||
|
||||
/* MOV dst, src */
|
||||
dst.WriteMask = RC_MASK_XYZW;
|
||||
emit1(c, inst->Prev, RC_OPCODE_MOV, NULL,
|
||||
dst,
|
||||
inst->U.I.SrcReg[0]);
|
||||
/* MOV dst, src */
|
||||
dst.WriteMask = RC_MASK_XYZW;
|
||||
emit1(c, inst->Prev, RC_OPCODE_MOV, NULL, dst, inst->U.I.SrcReg[0]);
|
||||
|
||||
/* MAX dst.y, src, 0.00...001 */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MAX, NULL,
|
||||
dstregtmpmask(dst.Index, RC_MASK_Y),
|
||||
srcreg(RC_FILE_TEMPORARY, dst.Index),
|
||||
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
|
||||
/* MAX dst.y, src, 0.00...001 */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(dst.Index, RC_MASK_Y),
|
||||
srcreg(RC_FILE_TEMPORARY, dst.Index),
|
||||
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
|
||||
|
||||
inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
|
||||
inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
|
||||
struct rc_instruction *inst)
|
||||
static void
|
||||
transform_r300_vertex_SEQ(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
/* x = y <==> x >= y && y >= x */
|
||||
/* x <= y */
|
||||
struct rc_dst_register dst0 = new_dst_reg(c, inst);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SGE, NULL,
|
||||
dst0,
|
||||
inst->U.I.SrcReg[0],
|
||||
inst->U.I.SrcReg[1]);
|
||||
/* x = y <==> x >= y && y >= x */
|
||||
/* x <= y */
|
||||
struct rc_dst_register dst0 = new_dst_reg(c, inst);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]);
|
||||
|
||||
/* y <= x */
|
||||
int tmp = rc_find_free_temporary(c);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SGE, NULL,
|
||||
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
|
||||
inst->U.I.SrcReg[1],
|
||||
inst->U.I.SrcReg[0]);
|
||||
/* y <= x */
|
||||
int tmp = rc_find_free_temporary(c);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SGE, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
|
||||
inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]);
|
||||
|
||||
/* x && y = x * y */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MUL, NULL,
|
||||
inst->U.I.DstReg,
|
||||
srcreg(dst0.File, dst0.Index),
|
||||
srcreg(RC_FILE_TEMPORARY, tmp));
|
||||
/* x && y = x * y */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MUL, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index),
|
||||
srcreg(RC_FILE_TEMPORARY, tmp));
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
static void transform_r300_vertex_SNE(struct radeon_compiler *c,
|
||||
struct rc_instruction *inst)
|
||||
static void
|
||||
transform_r300_vertex_SNE(struct radeon_compiler *c, struct rc_instruction *inst)
|
||||
{
|
||||
/* x != y <==> x < y || y < x */
|
||||
/* x < y */
|
||||
struct rc_dst_register dst0 = new_dst_reg(c, inst);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL,
|
||||
dst0,
|
||||
inst->U.I.SrcReg[0],
|
||||
inst->U.I.SrcReg[1]);
|
||||
/* x != y <==> x < y || y < x */
|
||||
/* x < y */
|
||||
struct rc_dst_register dst0 = new_dst_reg(c, inst);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dst0, inst->U.I.SrcReg[0], inst->U.I.SrcReg[1]);
|
||||
|
||||
/* y < x */
|
||||
int tmp = rc_find_free_temporary(c);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL,
|
||||
dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
|
||||
inst->U.I.SrcReg[1],
|
||||
inst->U.I.SrcReg[0]);
|
||||
/* y < x */
|
||||
int tmp = rc_find_free_temporary(c);
|
||||
emit2(c, inst->Prev, RC_OPCODE_SLT, NULL, dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
|
||||
inst->U.I.SrcReg[1], inst->U.I.SrcReg[0]);
|
||||
|
||||
/* x || y = max(x, y) */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MAX, NULL,
|
||||
inst->U.I.DstReg,
|
||||
srcreg(dst0.File, dst0.Index),
|
||||
srcreg(RC_FILE_TEMPORARY, tmp));
|
||||
/* x || y = max(x, y) */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, inst->U.I.DstReg, srcreg(dst0.File, dst0.Index),
|
||||
srcreg(RC_FILE_TEMPORARY, tmp));
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
/**
|
||||
* For use with rc_local_transform, this transforms non-native ALU
|
||||
* instructions of the r300 up to r500 vertex engine.
|
||||
*/
|
||||
int r300_transform_vertex_alu(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
int
|
||||
r300_transform_vertex_alu(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
|
||||
{
|
||||
switch(inst->U.I.Opcode) {
|
||||
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
|
||||
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
|
||||
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
|
||||
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
|
||||
case RC_OPCODE_SEQ:
|
||||
if (!c->is_r500) {
|
||||
transform_r300_vertex_SEQ(c, inst);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
case RC_OPCODE_SNE:
|
||||
if (!c->is_r500) {
|
||||
transform_r300_vertex_SNE(c, inst);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
switch (inst->U.I.Opcode) {
|
||||
case RC_OPCODE_CMP:
|
||||
transform_r300_vertex_CMP(c, inst);
|
||||
return 1;
|
||||
case RC_OPCODE_DP2:
|
||||
transform_r300_vertex_DP2(c, inst);
|
||||
return 1;
|
||||
case RC_OPCODE_DP3:
|
||||
transform_r300_vertex_DP3(c, inst);
|
||||
return 1;
|
||||
case RC_OPCODE_LIT:
|
||||
transform_r300_vertex_fix_LIT(c, inst);
|
||||
return 1;
|
||||
case RC_OPCODE_SEQ:
|
||||
if (!c->is_r500) {
|
||||
transform_r300_vertex_SEQ(c, inst);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
case RC_OPCODE_SNE:
|
||||
if (!c->is_r500) {
|
||||
transform_r300_vertex_SNE(c, inst);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -374,21 +359,20 @@ int r300_transform_vertex_alu(
|
|||
* @warning This explicitly changes the form of DDX and DDY!
|
||||
*/
|
||||
|
||||
int radeonStubDeriv(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
int
|
||||
radeonStubDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
|
||||
{
|
||||
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
|
||||
return 0;
|
||||
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
|
||||
return 0;
|
||||
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
|
||||
mesa_logw_once("r300: WARNING: Shader is trying to use derivatives, "
|
||||
"but the hardware doesn't support it. "
|
||||
"Expect possible misrendering (it's not a bug, do not report it).");
|
||||
mesa_logw_once("r300: WARNING: Shader is trying to use derivatives, "
|
||||
"but the hardware doesn't support it. "
|
||||
"Expect possible misrendering (it's not a bug, do not report it).");
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -399,43 +383,42 @@ int radeonStubDeriv(struct radeon_compiler* c,
|
|||
* @warning This explicitly changes the form of DDX and DDY!
|
||||
*/
|
||||
|
||||
int radeonTransformDeriv(struct radeon_compiler* c,
|
||||
struct rc_instruction* inst,
|
||||
void* unused)
|
||||
int
|
||||
radeonTransformDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *unused)
|
||||
{
|
||||
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
|
||||
return 0;
|
||||
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
|
||||
return 0;
|
||||
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
||||
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
|
||||
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int rc_force_output_alpha_to_one(struct radeon_compiler *c,
|
||||
struct rc_instruction *inst, void *data)
|
||||
int
|
||||
rc_force_output_alpha_to_one(struct radeon_compiler *c, struct rc_instruction *inst, void *data)
|
||||
{
|
||||
struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned tmp;
|
||||
struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler *)c;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned tmp;
|
||||
|
||||
if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT ||
|
||||
inst->U.I.DstReg.Index == fragc->OutputDepth)
|
||||
return 1;
|
||||
if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT ||
|
||||
inst->U.I.DstReg.Index == fragc->OutputDepth)
|
||||
return 1;
|
||||
|
||||
tmp = rc_find_free_temporary(c);
|
||||
tmp = rc_find_free_temporary(c);
|
||||
|
||||
/* Insert MOV after inst, set alpha to 1. */
|
||||
emit1(c, inst, RC_OPCODE_MOV, NULL, inst->U.I.DstReg,
|
||||
srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1));
|
||||
/* Insert MOV after inst, set alpha to 1. */
|
||||
emit1(c, inst, RC_OPCODE_MOV, NULL, inst->U.I.DstReg,
|
||||
srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1));
|
||||
|
||||
/* Re-route the destination of inst to the source of mov. */
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tmp;
|
||||
/* Re-route the destination of inst to the source of mov. */
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tmp;
|
||||
|
||||
/* Move the saturate output modifier to the MOV instruction
|
||||
* (for better copy propagation). */
|
||||
inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode;
|
||||
inst->U.I.SaturateMode = RC_SATURATE_NONE;
|
||||
return 1;
|
||||
/* Move the saturate output modifier to the MOV instruction
|
||||
* (for better copy propagation). */
|
||||
inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode;
|
||||
inst->U.I.SaturateMode = RC_SATURATE_NONE;
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,27 +8,15 @@
|
|||
|
||||
#include "radeon_program.h"
|
||||
|
||||
int radeonTransformALU(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
int radeonTransformALU(struct radeon_compiler *c, struct rc_instruction *inst, void *);
|
||||
|
||||
int r300_transform_vertex_alu(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
int r300_transform_vertex_alu(struct radeon_compiler *c, struct rc_instruction *inst, void *);
|
||||
|
||||
int radeonStubDeriv(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
int radeonStubDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *);
|
||||
|
||||
int radeonTransformDeriv(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void*);
|
||||
int radeonTransformDeriv(struct radeon_compiler *c, struct rc_instruction *inst, void *);
|
||||
|
||||
int rc_force_output_alpha_to_one(struct radeon_compiler *c,
|
||||
struct rc_instruction *inst, void *data);
|
||||
int rc_force_output_alpha_to_one(struct radeon_compiler *c, struct rc_instruction *inst,
|
||||
void *data);
|
||||
|
||||
#endif /* __RADEON_PROGRAM_ALU_H_ */
|
||||
|
|
|
|||
|
|
@ -7,105 +7,106 @@
|
|||
#define RADEON_PROGRAM_CONSTANTS_H
|
||||
|
||||
typedef enum {
|
||||
RC_SATURATE_NONE = 0,
|
||||
RC_SATURATE_ZERO_ONE,
|
||||
RC_SATURATE_MINUS_PLUS_ONE
|
||||
RC_SATURATE_NONE = 0,
|
||||
RC_SATURATE_ZERO_ONE,
|
||||
RC_SATURATE_MINUS_PLUS_ONE
|
||||
} rc_saturate_mode;
|
||||
|
||||
typedef enum {
|
||||
RC_TEXTURE_2D_ARRAY,
|
||||
RC_TEXTURE_1D_ARRAY,
|
||||
RC_TEXTURE_CUBE,
|
||||
RC_TEXTURE_3D,
|
||||
RC_TEXTURE_RECT,
|
||||
RC_TEXTURE_2D,
|
||||
RC_TEXTURE_1D
|
||||
RC_TEXTURE_2D_ARRAY,
|
||||
RC_TEXTURE_1D_ARRAY,
|
||||
RC_TEXTURE_CUBE,
|
||||
RC_TEXTURE_3D,
|
||||
RC_TEXTURE_RECT,
|
||||
RC_TEXTURE_2D,
|
||||
RC_TEXTURE_1D
|
||||
} rc_texture_target;
|
||||
|
||||
typedef enum {
|
||||
/**
|
||||
* Used to indicate unused register descriptions and
|
||||
* source register that use a constant swizzle.
|
||||
*/
|
||||
RC_FILE_NONE = 0,
|
||||
RC_FILE_TEMPORARY,
|
||||
/**
|
||||
* Used to indicate unused register descriptions and
|
||||
* source register that use a constant swizzle.
|
||||
*/
|
||||
RC_FILE_NONE = 0,
|
||||
RC_FILE_TEMPORARY,
|
||||
|
||||
/**
|
||||
* Input register.
|
||||
*
|
||||
* \note The compiler attaches no implicit semantics to input registers.
|
||||
* Fragment/vertex program specific semantics must be defined explicitly
|
||||
* using the appropriate compiler interfaces.
|
||||
*/
|
||||
RC_FILE_INPUT,
|
||||
/**
|
||||
* Input register.
|
||||
*
|
||||
* \note The compiler attaches no implicit semantics to input registers.
|
||||
* Fragment/vertex program specific semantics must be defined explicitly
|
||||
* using the appropriate compiler interfaces.
|
||||
*/
|
||||
RC_FILE_INPUT,
|
||||
|
||||
/**
|
||||
* Output register.
|
||||
*
|
||||
* \note The compiler attaches no implicit semantics to input registers.
|
||||
* Fragment/vertex program specific semantics must be defined explicitly
|
||||
* using the appropriate compiler interfaces.
|
||||
*/
|
||||
RC_FILE_OUTPUT,
|
||||
RC_FILE_ADDRESS,
|
||||
/**
|
||||
* Output register.
|
||||
*
|
||||
* \note The compiler attaches no implicit semantics to input registers.
|
||||
* Fragment/vertex program specific semantics must be defined explicitly
|
||||
* using the appropriate compiler interfaces.
|
||||
*/
|
||||
RC_FILE_OUTPUT,
|
||||
RC_FILE_ADDRESS,
|
||||
|
||||
/**
|
||||
* Indicates a constant from the \ref rc_constant_list .
|
||||
*/
|
||||
RC_FILE_CONSTANT,
|
||||
/**
|
||||
* Indicates a constant from the \ref rc_constant_list .
|
||||
*/
|
||||
RC_FILE_CONSTANT,
|
||||
|
||||
/**
|
||||
* Indicates a special register, see RC_SPECIAL_xxx.
|
||||
*/
|
||||
RC_FILE_SPECIAL,
|
||||
/**
|
||||
* Indicates a special register, see RC_SPECIAL_xxx.
|
||||
*/
|
||||
RC_FILE_SPECIAL,
|
||||
|
||||
/**
|
||||
* Indicates this register should use the result of the presubtract
|
||||
* operation.
|
||||
*/
|
||||
RC_FILE_PRESUB,
|
||||
/**
|
||||
* Indicates this register should use the result of the presubtract
|
||||
* operation.
|
||||
*/
|
||||
RC_FILE_PRESUB,
|
||||
|
||||
/**
|
||||
* Indicates that the source index has been encoded as a 7-bit float.
|
||||
*/
|
||||
RC_FILE_INLINE
|
||||
/**
|
||||
* Indicates that the source index has been encoded as a 7-bit float.
|
||||
*/
|
||||
RC_FILE_INLINE
|
||||
} rc_register_file;
|
||||
|
||||
enum {
|
||||
/** R500 fragment program ALU result "register" */
|
||||
RC_SPECIAL_ALU_RESULT = 0,
|
||||
/** R500 fragment program ALU result "register" */
|
||||
RC_SPECIAL_ALU_RESULT = 0,
|
||||
|
||||
/** Must be last */
|
||||
RC_NUM_SPECIAL_REGISTERS
|
||||
/** Must be last */
|
||||
RC_NUM_SPECIAL_REGISTERS
|
||||
};
|
||||
|
||||
#define RC_REGISTER_INDEX_BITS 11
|
||||
#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
|
||||
#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
|
||||
|
||||
typedef enum {
|
||||
RC_SWIZZLE_X = 0,
|
||||
RC_SWIZZLE_Y,
|
||||
RC_SWIZZLE_Z,
|
||||
RC_SWIZZLE_W,
|
||||
RC_SWIZZLE_ZERO,
|
||||
RC_SWIZZLE_ONE,
|
||||
RC_SWIZZLE_HALF,
|
||||
RC_SWIZZLE_UNUSED
|
||||
RC_SWIZZLE_X = 0,
|
||||
RC_SWIZZLE_Y,
|
||||
RC_SWIZZLE_Z,
|
||||
RC_SWIZZLE_W,
|
||||
RC_SWIZZLE_ZERO,
|
||||
RC_SWIZZLE_ONE,
|
||||
RC_SWIZZLE_HALF,
|
||||
RC_SWIZZLE_UNUSED
|
||||
} rc_swizzle;
|
||||
|
||||
static inline int is_swizzle_inline_constant(rc_swizzle swizzle){
|
||||
return swizzle >= RC_SWIZZLE_ZERO;
|
||||
|
||||
static inline int
|
||||
is_swizzle_inline_constant(rc_swizzle swizzle)
|
||||
{
|
||||
return swizzle >= RC_SWIZZLE_ZERO;
|
||||
}
|
||||
|
||||
#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
|
||||
#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
|
||||
#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
|
||||
#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
|
||||
#define SET_SWZ(swz, idx, newv) \
|
||||
do { \
|
||||
(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
|
||||
} while(0)
|
||||
#define RC_MAKE_SWIZZLE(a, b, c, d) (((a) << 0) | ((b) << 3) | ((c) << 6) | ((d) << 9))
|
||||
#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a), (a), (a), (a))
|
||||
#define GET_SWZ(swz, idx) (((swz) >> ((idx) * 3)) & 0x7)
|
||||
#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
|
||||
#define SET_SWZ(swz, idx, newv) \
|
||||
do { \
|
||||
(swz) = ((swz) & ~(7 << ((idx) * 3))) | ((newv) << ((idx) * 3)); \
|
||||
} while (0)
|
||||
|
||||
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
|
||||
#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
|
||||
|
|
@ -127,70 +128,64 @@ static inline int is_swizzle_inline_constant(rc_swizzle swizzle){
|
|||
*/
|
||||
/*@{*/
|
||||
#define RC_MASK_NONE 0
|
||||
#define RC_MASK_X 1
|
||||
#define RC_MASK_Y 2
|
||||
#define RC_MASK_Z 4
|
||||
#define RC_MASK_W 8
|
||||
#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
|
||||
#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
|
||||
#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
|
||||
#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
|
||||
#define RC_MASK_X 1
|
||||
#define RC_MASK_Y 2
|
||||
#define RC_MASK_Z 4
|
||||
#define RC_MASK_W 8
|
||||
#define RC_MASK_XY (RC_MASK_X | RC_MASK_Y)
|
||||
#define RC_MASK_XYZ (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)
|
||||
#define RC_MASK_XYW (RC_MASK_X | RC_MASK_Y | RC_MASK_W)
|
||||
#define RC_MASK_XYZW (RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W)
|
||||
/*@}*/
|
||||
|
||||
typedef enum {
|
||||
RC_ALURESULT_NONE = 0,
|
||||
RC_ALURESULT_X,
|
||||
RC_ALURESULT_W
|
||||
} rc_write_aluresult;
|
||||
typedef enum { RC_ALURESULT_NONE = 0, RC_ALURESULT_X, RC_ALURESULT_W } rc_write_aluresult;
|
||||
|
||||
typedef enum {
|
||||
RC_PRESUB_NONE = 0,
|
||||
RC_PRESUB_NONE = 0,
|
||||
|
||||
/** 1 - 2 * src0 */
|
||||
RC_PRESUB_BIAS,
|
||||
/** 1 - 2 * src0 */
|
||||
RC_PRESUB_BIAS,
|
||||
|
||||
/** src1 - src0 */
|
||||
RC_PRESUB_SUB,
|
||||
/** src1 - src0 */
|
||||
RC_PRESUB_SUB,
|
||||
|
||||
/** src1 + src0 */
|
||||
RC_PRESUB_ADD,
|
||||
/** src1 + src0 */
|
||||
RC_PRESUB_ADD,
|
||||
|
||||
/** 1 - src0 */
|
||||
RC_PRESUB_INV
|
||||
/** 1 - src0 */
|
||||
RC_PRESUB_INV
|
||||
} rc_presubtract_op;
|
||||
|
||||
typedef enum {
|
||||
RC_OMOD_MUL_1,
|
||||
RC_OMOD_MUL_2,
|
||||
RC_OMOD_MUL_4,
|
||||
RC_OMOD_MUL_8,
|
||||
RC_OMOD_DIV_2,
|
||||
RC_OMOD_DIV_4,
|
||||
RC_OMOD_DIV_8,
|
||||
RC_OMOD_DISABLE
|
||||
RC_OMOD_MUL_1,
|
||||
RC_OMOD_MUL_2,
|
||||
RC_OMOD_MUL_4,
|
||||
RC_OMOD_MUL_8,
|
||||
RC_OMOD_DIV_2,
|
||||
RC_OMOD_DIV_4,
|
||||
RC_OMOD_DIV_8,
|
||||
RC_OMOD_DISABLE
|
||||
} rc_omod_op;
|
||||
|
||||
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
|
||||
switch(op){
|
||||
case RC_PRESUB_BIAS:
|
||||
case RC_PRESUB_INV:
|
||||
return 1;
|
||||
case RC_PRESUB_ADD:
|
||||
case RC_PRESUB_SUB:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
static inline int
|
||||
rc_presubtract_src_reg_count(rc_presubtract_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case RC_PRESUB_BIAS:
|
||||
case RC_PRESUB_INV:
|
||||
return 1;
|
||||
case RC_PRESUB_ADD:
|
||||
case RC_PRESUB_SUB:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#define RC_SOURCE_NONE 0x0
|
||||
#define RC_SOURCE_RGB 0x1
|
||||
#define RC_SOURCE_ALPHA 0x2
|
||||
|
||||
typedef enum {
|
||||
RC_PRED_DISABLED,
|
||||
RC_PRED_SET,
|
||||
RC_PRED_INV
|
||||
} rc_predicate_mode;
|
||||
typedef enum { RC_PRED_DISABLED, RC_PRED_SET, RC_PRED_INV } rc_predicate_mode;
|
||||
|
||||
#endif /* RADEON_PROGRAM_CONSTANTS_H */
|
||||
|
|
|
|||
|
|
@ -13,205 +13,188 @@
|
|||
* Return the source slot where we installed the given register access,
|
||||
* or -1 if no slot was free anymore.
|
||||
*/
|
||||
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
|
||||
unsigned int rgb, unsigned int alpha,
|
||||
rc_register_file file, unsigned int index)
|
||||
int
|
||||
rc_pair_alloc_source(struct rc_pair_instruction *pair, unsigned int rgb, unsigned int alpha,
|
||||
rc_register_file file, unsigned int index)
|
||||
{
|
||||
int candidate = -1;
|
||||
int candidate_quality = -1;
|
||||
unsigned int alpha_used = 0;
|
||||
unsigned int rgb_used = 0;
|
||||
int i;
|
||||
int candidate = -1;
|
||||
int candidate_quality = -1;
|
||||
unsigned int alpha_used = 0;
|
||||
unsigned int rgb_used = 0;
|
||||
int i;
|
||||
|
||||
if ((!rgb && !alpha) || file == RC_FILE_NONE)
|
||||
return 0;
|
||||
if ((!rgb && !alpha) || file == RC_FILE_NONE)
|
||||
return 0;
|
||||
|
||||
/* Make sure only one presubtract operation is used per instruction. */
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
|
||||
&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
/* Make sure only one presubtract operation is used per instruction. */
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used &&
|
||||
index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
|
||||
&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used &&
|
||||
index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < 3; ++i) {
|
||||
int q = 0;
|
||||
if (rgb) {
|
||||
if (pair->RGB.Src[i].Used) {
|
||||
if (pair->RGB.Src[i].File != file ||
|
||||
pair->RGB.Src[i].Index != index) {
|
||||
rgb_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
if (pair->Alpha.Src[i].Used) {
|
||||
if (pair->Alpha.Src[i].File != file ||
|
||||
pair->Alpha.Src[i].Index != index) {
|
||||
alpha_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if (q > candidate_quality) {
|
||||
candidate_quality = q;
|
||||
candidate = i;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < 3; ++i) {
|
||||
int q = 0;
|
||||
if (rgb) {
|
||||
if (pair->RGB.Src[i].Used) {
|
||||
if (pair->RGB.Src[i].File != file || pair->RGB.Src[i].Index != index) {
|
||||
rgb_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
if (pair->Alpha.Src[i].Used) {
|
||||
if (pair->Alpha.Src[i].File != file || pair->Alpha.Src[i].Index != index) {
|
||||
alpha_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if (q > candidate_quality) {
|
||||
candidate_quality = q;
|
||||
candidate = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
candidate = RC_PAIR_PRESUB_SRC;
|
||||
} else if (candidate < 0 || (rgb && rgb_used > 2)
|
||||
|| (alpha && alpha_used > 2)) {
|
||||
return -1;
|
||||
}
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
candidate = RC_PAIR_PRESUB_SRC;
|
||||
} else if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* candidate >= 0 */
|
||||
/* candidate >= 0 */
|
||||
|
||||
if (rgb) {
|
||||
pair->RGB.Src[candidate].Used = 1;
|
||||
pair->RGB.Src[candidate].File = file;
|
||||
pair->RGB.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for(i = 0; i < src_regs; i++) {
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
pair->Alpha.Src[candidate].Used = 1;
|
||||
pair->Alpha.Src[candidate].File = file;
|
||||
pair->Alpha.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for(i=0; i < src_regs; i++) {
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rgb) {
|
||||
pair->RGB.Src[candidate].Used = 1;
|
||||
pair->RGB.Src[candidate].File = file;
|
||||
pair->RGB.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for (i = 0; i < src_regs; i++) {
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
pair->Alpha.Src[candidate].Used = 1;
|
||||
pair->Alpha.Src[candidate].File = file;
|
||||
pair->Alpha.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for (i = 0; i < src_regs; i++) {
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return candidate;
|
||||
return candidate;
|
||||
}
|
||||
|
||||
static void pair_foreach_source_callback(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb,
|
||||
unsigned int swz,
|
||||
unsigned int src)
|
||||
static void
|
||||
pair_foreach_source_callback(struct rc_pair_instruction *pair, void *data,
|
||||
rc_pair_foreach_src_fn cb, unsigned int swz, unsigned int src)
|
||||
{
|
||||
/* swz > 3 means that the swizzle is either not used, or a constant
|
||||
* swizzle (e.g. 0, 1, 0.5). */
|
||||
if(swz > 3)
|
||||
return;
|
||||
/* swz > 3 means that the swizzle is either not used, or a constant
|
||||
* swizzle (e.g. 0, 1, 0.5). */
|
||||
if (swz > 3)
|
||||
return;
|
||||
|
||||
if(swz == RC_SWIZZLE_W) {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count = rc_presubtract_src_reg_count(
|
||||
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->Alpha.Src[i]);
|
||||
}
|
||||
} else {
|
||||
cb(data, &pair->Alpha.Src[src]);
|
||||
}
|
||||
} else {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count = rc_presubtract_src_reg_count(
|
||||
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->RGB.Src[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cb(data, &pair->RGB.Src[src]);
|
||||
}
|
||||
}
|
||||
if (swz == RC_SWIZZLE_W) {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count =
|
||||
rc_presubtract_src_reg_count(pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for (i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->Alpha.Src[i]);
|
||||
}
|
||||
} else {
|
||||
cb(data, &pair->Alpha.Src[src]);
|
||||
}
|
||||
} else {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count =
|
||||
rc_presubtract_src_reg_count(pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for (i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->RGB.Src[i]);
|
||||
}
|
||||
} else {
|
||||
cb(data, &pair->RGB.Src[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_pair_foreach_source_that_alpha_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
void
|
||||
rc_pair_foreach_source_that_alpha_reads(struct rc_pair_instruction *pair, void *data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
{
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(pair->Alpha.Opcode);
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
pair_foreach_source_callback(pair, data, cb,
|
||||
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
|
||||
pair->Alpha.Arg[i].Source);
|
||||
}
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(pair->Alpha.Opcode);
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
pair_foreach_source_callback(pair, data, cb, GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
|
||||
pair->Alpha.Arg[i].Source);
|
||||
}
|
||||
}
|
||||
|
||||
void rc_pair_foreach_source_that_rgb_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
void
|
||||
rc_pair_foreach_source_that_rgb_reads(struct rc_pair_instruction *pair, void *data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
{
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(pair->RGB.Opcode);
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
unsigned int chan;
|
||||
unsigned int swz = RC_SWIZZLE_UNUSED;
|
||||
/* Find a swizzle that is either X,Y,Z,or W. We assume here
|
||||
* that if one channel swizzles X,Y, or Z, then none of the
|
||||
* other channels swizzle W, and vice-versa. */
|
||||
for(chan = 0; chan < 4; chan++) {
|
||||
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
|
||||
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|
||||
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
|
||||
continue;
|
||||
}
|
||||
pair_foreach_source_callback(pair, data, cb,
|
||||
swz,
|
||||
pair->RGB.Arg[i].Source);
|
||||
}
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(pair->RGB.Opcode);
|
||||
for (i = 0; i < info->NumSrcRegs; i++) {
|
||||
unsigned int chan;
|
||||
unsigned int swz = RC_SWIZZLE_UNUSED;
|
||||
/* Find a swizzle that is either X,Y,Z,or W. We assume here
|
||||
* that if one channel swizzles X,Y, or Z, then none of the
|
||||
* other channels swizzle W, and vice-versa. */
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
|
||||
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z ||
|
||||
swz == RC_SWIZZLE_W)
|
||||
continue;
|
||||
}
|
||||
pair_foreach_source_callback(pair, data, cb, swz, pair->RGB.Arg[i].Source);
|
||||
}
|
||||
}
|
||||
|
||||
struct rc_pair_instruction_source * rc_pair_get_src(
|
||||
struct rc_pair_instruction * pair_inst,
|
||||
struct rc_pair_instruction_arg * arg)
|
||||
struct rc_pair_instruction_source *
|
||||
rc_pair_get_src(struct rc_pair_instruction *pair_inst, struct rc_pair_instruction_arg *arg)
|
||||
{
|
||||
unsigned int type;
|
||||
unsigned int type;
|
||||
|
||||
type = rc_source_type_swz(arg->Swizzle);
|
||||
type = rc_source_type_swz(arg->Swizzle);
|
||||
|
||||
if (type & RC_SOURCE_RGB) {
|
||||
return &pair_inst->RGB.Src[arg->Source];
|
||||
} else if (type & RC_SOURCE_ALPHA) {
|
||||
return &pair_inst->Alpha.Src[arg->Source];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
if (type & RC_SOURCE_RGB) {
|
||||
return &pair_inst->RGB.Src[arg->Source];
|
||||
} else if (type & RC_SOURCE_ALPHA) {
|
||||
return &pair_inst->Alpha.Src[arg->Source];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int rc_pair_get_src_index(
|
||||
struct rc_pair_instruction * pair_inst,
|
||||
struct rc_pair_instruction_source * src)
|
||||
int
|
||||
rc_pair_get_src_index(struct rc_pair_instruction *pair_inst, struct rc_pair_instruction_source *src)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (&pair_inst->RGB.Src[i] == src
|
||||
|| &pair_inst->Alpha.Src[i] == src) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
int i;
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (&pair_inst->RGB.Src[i] == src || &pair_inst->Alpha.Src[i] == src) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
|
||||
struct radeon_compiler;
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* Represents a paired ALU instruction, as found in R300 and R500
|
||||
|
|
@ -34,73 +33,64 @@ struct radeon_compiler;
|
|||
#define RC_PAIR_PRESUB_SRC 3
|
||||
|
||||
struct rc_pair_instruction_source {
|
||||
unsigned int Used:1;
|
||||
unsigned int File:4;
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int Used : 1;
|
||||
unsigned int File : 4;
|
||||
unsigned int Index : RC_REGISTER_INDEX_BITS;
|
||||
};
|
||||
|
||||
struct rc_pair_instruction_arg {
|
||||
unsigned int Source:2;
|
||||
unsigned int Swizzle:12;
|
||||
unsigned int Abs:1;
|
||||
unsigned int Negate:1;
|
||||
unsigned int Source : 2;
|
||||
unsigned int Swizzle : 12;
|
||||
unsigned int Abs : 1;
|
||||
unsigned int Negate : 1;
|
||||
};
|
||||
|
||||
struct rc_pair_sub_instruction {
|
||||
unsigned int Opcode:8;
|
||||
unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int WriteMask:4;
|
||||
unsigned int Target:2;
|
||||
unsigned int OutputWriteMask:3;
|
||||
unsigned int DepthWriteMask:1;
|
||||
unsigned int Saturate:1;
|
||||
unsigned int Omod:3;
|
||||
unsigned int Opcode : 8;
|
||||
unsigned int DestIndex : RC_REGISTER_INDEX_BITS;
|
||||
unsigned int WriteMask : 4;
|
||||
unsigned int Target : 2;
|
||||
unsigned int OutputWriteMask : 3;
|
||||
unsigned int DepthWriteMask : 1;
|
||||
unsigned int Saturate : 1;
|
||||
unsigned int Omod : 3;
|
||||
|
||||
struct rc_pair_instruction_source Src[4];
|
||||
struct rc_pair_instruction_arg Arg[3];
|
||||
struct rc_pair_instruction_source Src[4];
|
||||
struct rc_pair_instruction_arg Arg[3];
|
||||
};
|
||||
|
||||
struct rc_pair_instruction {
|
||||
struct rc_pair_sub_instruction RGB;
|
||||
struct rc_pair_sub_instruction Alpha;
|
||||
struct rc_pair_sub_instruction RGB;
|
||||
struct rc_pair_sub_instruction Alpha;
|
||||
|
||||
unsigned int WriteALUResult:2;
|
||||
unsigned int ALUResultCompare:3;
|
||||
unsigned int Nop:1;
|
||||
unsigned int SemWait:1;
|
||||
unsigned int WriteALUResult : 2;
|
||||
unsigned int ALUResultCompare : 3;
|
||||
unsigned int Nop : 1;
|
||||
unsigned int SemWait : 1;
|
||||
};
|
||||
|
||||
typedef void (*rc_pair_foreach_src_fn)
|
||||
(void *, struct rc_pair_instruction_source *);
|
||||
typedef void (*rc_pair_foreach_src_fn)(void *, struct rc_pair_instruction_source *);
|
||||
|
||||
/**
|
||||
* General helper functions for dealing with the paired instruction format.
|
||||
*/
|
||||
/*@{*/
|
||||
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
|
||||
unsigned int rgb, unsigned int alpha,
|
||||
rc_register_file file, unsigned int index);
|
||||
int rc_pair_alloc_source(struct rc_pair_instruction *pair, unsigned int rgb, unsigned int alpha,
|
||||
rc_register_file file, unsigned int index);
|
||||
|
||||
void rc_pair_foreach_source_that_alpha_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
void rc_pair_foreach_source_that_alpha_reads(struct rc_pair_instruction *pair, void *data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
|
||||
void rc_pair_foreach_source_that_rgb_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
void rc_pair_foreach_source_that_rgb_reads(struct rc_pair_instruction *pair, void *data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
|
||||
struct rc_pair_instruction_source * rc_pair_get_src(
|
||||
struct rc_pair_instruction * pair_inst,
|
||||
struct rc_pair_instruction_arg * arg);
|
||||
struct rc_pair_instruction_source *rc_pair_get_src(struct rc_pair_instruction *pair_inst,
|
||||
struct rc_pair_instruction_arg *arg);
|
||||
|
||||
int rc_pair_get_src_index(
|
||||
struct rc_pair_instruction * pair_inst,
|
||||
struct rc_pair_instruction_source * src);
|
||||
int rc_pair_get_src_index(struct rc_pair_instruction *pair_inst,
|
||||
struct rc_pair_instruction_source *src);
|
||||
/*@}*/
|
||||
|
||||
|
||||
/**
|
||||
* Compiler passes that operate with the paired format.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,457 +3,511 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_program.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static const char * textarget_to_string(rc_texture_target target)
|
||||
static const char *
|
||||
textarget_to_string(rc_texture_target target)
|
||||
{
|
||||
switch(target) {
|
||||
case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
|
||||
case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
|
||||
case RC_TEXTURE_CUBE: return "CUBE";
|
||||
case RC_TEXTURE_3D: return "3D";
|
||||
case RC_TEXTURE_RECT: return "RECT";
|
||||
case RC_TEXTURE_2D: return "2D";
|
||||
case RC_TEXTURE_1D: return "1D";
|
||||
default: return "BAD_TEXTURE_TARGET";
|
||||
}
|
||||
switch (target) {
|
||||
case RC_TEXTURE_2D_ARRAY:
|
||||
return "2D_ARRAY";
|
||||
case RC_TEXTURE_1D_ARRAY:
|
||||
return "1D_ARRAY";
|
||||
case RC_TEXTURE_CUBE:
|
||||
return "CUBE";
|
||||
case RC_TEXTURE_3D:
|
||||
return "3D";
|
||||
case RC_TEXTURE_RECT:
|
||||
return "RECT";
|
||||
case RC_TEXTURE_2D:
|
||||
return "2D";
|
||||
case RC_TEXTURE_1D:
|
||||
return "1D";
|
||||
default:
|
||||
return "BAD_TEXTURE_TARGET";
|
||||
}
|
||||
}
|
||||
|
||||
static const char * presubtract_op_to_string(rc_presubtract_op op)
|
||||
static const char *
|
||||
presubtract_op_to_string(rc_presubtract_op op)
|
||||
{
|
||||
switch(op) {
|
||||
case RC_PRESUB_NONE:
|
||||
return "NONE";
|
||||
case RC_PRESUB_BIAS:
|
||||
return "(1 - 2 * src0)";
|
||||
case RC_PRESUB_SUB:
|
||||
return "(src1 - src0)";
|
||||
case RC_PRESUB_ADD:
|
||||
return "(src1 + src0)";
|
||||
case RC_PRESUB_INV:
|
||||
return "(1 - src0)";
|
||||
default:
|
||||
return "BAD_PRESUBTRACT_OP";
|
||||
}
|
||||
switch (op) {
|
||||
case RC_PRESUB_NONE:
|
||||
return "NONE";
|
||||
case RC_PRESUB_BIAS:
|
||||
return "(1 - 2 * src0)";
|
||||
case RC_PRESUB_SUB:
|
||||
return "(src1 - src0)";
|
||||
case RC_PRESUB_ADD:
|
||||
return "(src1 + src0)";
|
||||
case RC_PRESUB_INV:
|
||||
return "(1 - src0)";
|
||||
default:
|
||||
return "BAD_PRESUBTRACT_OP";
|
||||
}
|
||||
}
|
||||
|
||||
static void print_omod_op(FILE * f, rc_omod_op op)
|
||||
static void
|
||||
print_omod_op(FILE *f, rc_omod_op op)
|
||||
{
|
||||
const char * omod_str;
|
||||
const char *omod_str;
|
||||
|
||||
switch(op) {
|
||||
case RC_OMOD_MUL_1:
|
||||
return;
|
||||
case RC_OMOD_DISABLE:
|
||||
omod_str = "(OMOD DISABLE)";
|
||||
break;
|
||||
case RC_OMOD_MUL_2:
|
||||
omod_str = "* 2";
|
||||
break;
|
||||
case RC_OMOD_MUL_4:
|
||||
omod_str = "* 4";
|
||||
break;
|
||||
case RC_OMOD_MUL_8:
|
||||
omod_str = "* 8";
|
||||
break;
|
||||
case RC_OMOD_DIV_2:
|
||||
omod_str = "/ 2";
|
||||
break;
|
||||
case RC_OMOD_DIV_4:
|
||||
omod_str = "/ 4";
|
||||
break;
|
||||
case RC_OMOD_DIV_8:
|
||||
omod_str = "/ 8";
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
fprintf(f, " %s", omod_str);
|
||||
switch (op) {
|
||||
case RC_OMOD_MUL_1:
|
||||
return;
|
||||
case RC_OMOD_DISABLE:
|
||||
omod_str = "(OMOD DISABLE)";
|
||||
break;
|
||||
case RC_OMOD_MUL_2:
|
||||
omod_str = "* 2";
|
||||
break;
|
||||
case RC_OMOD_MUL_4:
|
||||
omod_str = "* 4";
|
||||
break;
|
||||
case RC_OMOD_MUL_8:
|
||||
omod_str = "* 8";
|
||||
break;
|
||||
case RC_OMOD_DIV_2:
|
||||
omod_str = "/ 2";
|
||||
break;
|
||||
case RC_OMOD_DIV_4:
|
||||
omod_str = "/ 4";
|
||||
break;
|
||||
case RC_OMOD_DIV_8:
|
||||
omod_str = "/ 8";
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
fprintf(f, " %s", omod_str);
|
||||
}
|
||||
|
||||
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
|
||||
static void
|
||||
rc_print_comparefunc(FILE *f, const char *lhs, rc_compare_func func, const char *rhs)
|
||||
{
|
||||
if (func == RC_COMPARE_FUNC_NEVER) {
|
||||
fprintf(f, "false");
|
||||
} else if (func == RC_COMPARE_FUNC_ALWAYS) {
|
||||
fprintf(f, "true");
|
||||
} else {
|
||||
const char * op;
|
||||
switch(func) {
|
||||
case RC_COMPARE_FUNC_LESS: op = "<"; break;
|
||||
case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
|
||||
case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
|
||||
case RC_COMPARE_FUNC_GREATER: op = ">"; break;
|
||||
case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
|
||||
case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
|
||||
default: op = "???"; break;
|
||||
}
|
||||
fprintf(f, "%s %s %s", lhs, op, rhs);
|
||||
}
|
||||
if (func == RC_COMPARE_FUNC_NEVER) {
|
||||
fprintf(f, "false");
|
||||
} else if (func == RC_COMPARE_FUNC_ALWAYS) {
|
||||
fprintf(f, "true");
|
||||
} else {
|
||||
const char *op;
|
||||
switch (func) {
|
||||
case RC_COMPARE_FUNC_LESS:
|
||||
op = "<";
|
||||
break;
|
||||
case RC_COMPARE_FUNC_EQUAL:
|
||||
op = "==";
|
||||
break;
|
||||
case RC_COMPARE_FUNC_LEQUAL:
|
||||
op = "<=";
|
||||
break;
|
||||
case RC_COMPARE_FUNC_GREATER:
|
||||
op = ">";
|
||||
break;
|
||||
case RC_COMPARE_FUNC_NOTEQUAL:
|
||||
op = "!=";
|
||||
break;
|
||||
case RC_COMPARE_FUNC_GEQUAL:
|
||||
op = ">=";
|
||||
break;
|
||||
default:
|
||||
op = "???";
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%s %s %s", lhs, op, rhs);
|
||||
}
|
||||
}
|
||||
|
||||
static void rc_print_inline_float(FILE * f, int index)
|
||||
static void
|
||||
rc_print_inline_float(FILE *f, int index)
|
||||
{
|
||||
fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index);
|
||||
fprintf(f, "%f (0x%x)", rc_inline_to_float(index), index);
|
||||
}
|
||||
|
||||
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
|
||||
static void
|
||||
rc_print_register(FILE *f, rc_register_file file, int index, unsigned int reladdr)
|
||||
{
|
||||
if (file == RC_FILE_NONE) {
|
||||
fprintf(f, "none");
|
||||
} else if (file == RC_FILE_SPECIAL) {
|
||||
switch(index) {
|
||||
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
|
||||
default: fprintf(f, "special[%i]", index); break;
|
||||
}
|
||||
} else if (file == RC_FILE_INLINE) {
|
||||
rc_print_inline_float(f, index);
|
||||
} else {
|
||||
const char * filename;
|
||||
switch(file) {
|
||||
case RC_FILE_TEMPORARY: filename = "temp"; break;
|
||||
case RC_FILE_INPUT: filename = "input"; break;
|
||||
case RC_FILE_OUTPUT: filename = "output"; break;
|
||||
case RC_FILE_ADDRESS: filename = "addr"; break;
|
||||
case RC_FILE_CONSTANT: filename = "const"; break;
|
||||
default: filename = "BAD FILE"; break;
|
||||
}
|
||||
fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
|
||||
}
|
||||
if (file == RC_FILE_NONE) {
|
||||
fprintf(f, "none");
|
||||
} else if (file == RC_FILE_SPECIAL) {
|
||||
switch (index) {
|
||||
case RC_SPECIAL_ALU_RESULT:
|
||||
fprintf(f, "aluresult");
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "special[%i]", index);
|
||||
break;
|
||||
}
|
||||
} else if (file == RC_FILE_INLINE) {
|
||||
rc_print_inline_float(f, index);
|
||||
} else {
|
||||
const char *filename;
|
||||
switch (file) {
|
||||
case RC_FILE_TEMPORARY:
|
||||
filename = "temp";
|
||||
break;
|
||||
case RC_FILE_INPUT:
|
||||
filename = "input";
|
||||
break;
|
||||
case RC_FILE_OUTPUT:
|
||||
filename = "output";
|
||||
break;
|
||||
case RC_FILE_ADDRESS:
|
||||
filename = "addr";
|
||||
break;
|
||||
case RC_FILE_CONSTANT:
|
||||
filename = "const";
|
||||
break;
|
||||
default:
|
||||
filename = "BAD FILE";
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
|
||||
}
|
||||
}
|
||||
|
||||
static void rc_print_mask(FILE * f, unsigned int mask)
|
||||
static void
|
||||
rc_print_mask(FILE *f, unsigned int mask)
|
||||
{
|
||||
if (mask & RC_MASK_X) fprintf(f, "x");
|
||||
if (mask & RC_MASK_Y) fprintf(f, "y");
|
||||
if (mask & RC_MASK_Z) fprintf(f, "z");
|
||||
if (mask & RC_MASK_W) fprintf(f, "w");
|
||||
if (mask & RC_MASK_X)
|
||||
fprintf(f, "x");
|
||||
if (mask & RC_MASK_Y)
|
||||
fprintf(f, "y");
|
||||
if (mask & RC_MASK_Z)
|
||||
fprintf(f, "z");
|
||||
if (mask & RC_MASK_W)
|
||||
fprintf(f, "w");
|
||||
}
|
||||
|
||||
static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
|
||||
static void
|
||||
rc_print_dst_register(FILE *f, struct rc_dst_register dst)
|
||||
{
|
||||
rc_print_register(f, dst.File, dst.Index, 0);
|
||||
if (dst.WriteMask != RC_MASK_XYZW) {
|
||||
fprintf(f, ".");
|
||||
rc_print_mask(f, dst.WriteMask);
|
||||
}
|
||||
rc_print_register(f, dst.File, dst.Index, 0);
|
||||
if (dst.WriteMask != RC_MASK_XYZW) {
|
||||
fprintf(f, ".");
|
||||
rc_print_mask(f, dst.WriteMask);
|
||||
}
|
||||
}
|
||||
|
||||
static char rc_swizzle_char(unsigned int swz)
|
||||
static char
|
||||
rc_swizzle_char(unsigned int swz)
|
||||
{
|
||||
switch(swz) {
|
||||
case RC_SWIZZLE_X: return 'x';
|
||||
case RC_SWIZZLE_Y: return 'y';
|
||||
case RC_SWIZZLE_Z: return 'z';
|
||||
case RC_SWIZZLE_W: return 'w';
|
||||
case RC_SWIZZLE_ZERO: return '0';
|
||||
case RC_SWIZZLE_ONE: return '1';
|
||||
case RC_SWIZZLE_HALF: return 'H';
|
||||
case RC_SWIZZLE_UNUSED: return '_';
|
||||
}
|
||||
fprintf(stderr, "bad swz: %u\n", swz);
|
||||
return '?';
|
||||
switch (swz) {
|
||||
case RC_SWIZZLE_X:
|
||||
return 'x';
|
||||
case RC_SWIZZLE_Y:
|
||||
return 'y';
|
||||
case RC_SWIZZLE_Z:
|
||||
return 'z';
|
||||
case RC_SWIZZLE_W:
|
||||
return 'w';
|
||||
case RC_SWIZZLE_ZERO:
|
||||
return '0';
|
||||
case RC_SWIZZLE_ONE:
|
||||
return '1';
|
||||
case RC_SWIZZLE_HALF:
|
||||
return 'H';
|
||||
case RC_SWIZZLE_UNUSED:
|
||||
return '_';
|
||||
}
|
||||
fprintf(stderr, "bad swz: %u\n", swz);
|
||||
return '?';
|
||||
}
|
||||
|
||||
static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
|
||||
static void
|
||||
rc_print_swizzle(FILE *f, unsigned int swizzle, unsigned int negate)
|
||||
{
|
||||
unsigned int comp;
|
||||
for(comp = 0; comp < 4; ++comp) {
|
||||
rc_swizzle swz = GET_SWZ(swizzle, comp);
|
||||
if (GET_BIT(negate, comp))
|
||||
fprintf(f, "-");
|
||||
fprintf(f, "%c", rc_swizzle_char(swz));
|
||||
}
|
||||
unsigned int comp;
|
||||
for (comp = 0; comp < 4; ++comp) {
|
||||
rc_swizzle swz = GET_SWZ(swizzle, comp);
|
||||
if (GET_BIT(negate, comp))
|
||||
fprintf(f, "-");
|
||||
fprintf(f, "%c", rc_swizzle_char(swz));
|
||||
}
|
||||
}
|
||||
|
||||
static void rc_print_presub_instruction(FILE * f,
|
||||
struct rc_presub_instruction inst)
|
||||
static void
|
||||
rc_print_presub_instruction(FILE *f, struct rc_presub_instruction inst)
|
||||
{
|
||||
fprintf(f,"(");
|
||||
switch(inst.Opcode){
|
||||
case RC_PRESUB_BIAS:
|
||||
fprintf(f, "1 - 2 * ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
rc_print_register(f, inst.SrcReg[1].File,
|
||||
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
|
||||
fprintf(f, " - ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
rc_print_register(f, inst.SrcReg[1].File,
|
||||
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
|
||||
fprintf(f, " + ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
fprintf(f, "1 - ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(f, ")");
|
||||
fprintf(f, "(");
|
||||
switch (inst.Opcode) {
|
||||
case RC_PRESUB_BIAS:
|
||||
fprintf(f, "1 - 2 * ");
|
||||
rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
rc_print_register(f, inst.SrcReg[1].File, inst.SrcReg[1].Index, inst.SrcReg[1].RelAddr);
|
||||
fprintf(f, " - ");
|
||||
rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
rc_print_register(f, inst.SrcReg[1].File, inst.SrcReg[1].Index, inst.SrcReg[1].RelAddr);
|
||||
fprintf(f, " + ");
|
||||
rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
fprintf(f, "1 - ");
|
||||
rc_print_register(f, inst.SrcReg[0].File, inst.SrcReg[0].Index, inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(f, ")");
|
||||
}
|
||||
|
||||
static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
|
||||
struct rc_src_register src)
|
||||
static void
|
||||
rc_print_src_register(FILE *f, struct rc_instruction *inst, struct rc_src_register src)
|
||||
{
|
||||
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
|
||||
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
|
||||
|
||||
if (src.Negate == RC_MASK_XYZW)
|
||||
fprintf(f, "-");
|
||||
if (src.Abs)
|
||||
fprintf(f, "|");
|
||||
if (src.Negate == RC_MASK_XYZW)
|
||||
fprintf(f, "-");
|
||||
if (src.Abs)
|
||||
fprintf(f, "|");
|
||||
|
||||
if(src.File == RC_FILE_PRESUB)
|
||||
rc_print_presub_instruction(f, inst->U.I.PreSub);
|
||||
else
|
||||
rc_print_register(f, src.File, src.Index, src.RelAddr);
|
||||
if (src.File == RC_FILE_PRESUB)
|
||||
rc_print_presub_instruction(f, inst->U.I.PreSub);
|
||||
else
|
||||
rc_print_register(f, src.File, src.Index, src.RelAddr);
|
||||
|
||||
if (src.Abs && !trivial_negate)
|
||||
fprintf(f, "|");
|
||||
if (src.Abs && !trivial_negate)
|
||||
fprintf(f, "|");
|
||||
|
||||
if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
|
||||
fprintf(f, ".");
|
||||
rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
|
||||
}
|
||||
if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
|
||||
fprintf(f, ".");
|
||||
rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
|
||||
}
|
||||
|
||||
if (src.Abs && trivial_negate)
|
||||
fprintf(f, "|");
|
||||
if (src.Abs && trivial_negate)
|
||||
fprintf(f, "|");
|
||||
}
|
||||
|
||||
static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
|
||||
static unsigned
|
||||
update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
|
||||
{
|
||||
switch (opcode) {
|
||||
case RC_OPCODE_IF:
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
return (*branch_depth)++ * 2;
|
||||
switch (opcode) {
|
||||
case RC_OPCODE_IF:
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
return (*branch_depth)++ * 2;
|
||||
|
||||
case RC_OPCODE_ENDIF:
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
assert(*branch_depth > 0);
|
||||
return --(*branch_depth) * 2;
|
||||
case RC_OPCODE_ENDIF:
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
assert(*branch_depth > 0);
|
||||
return --(*branch_depth) * 2;
|
||||
|
||||
case RC_OPCODE_ELSE:
|
||||
assert(*branch_depth > 0);
|
||||
return (*branch_depth - 1) * 2;
|
||||
case RC_OPCODE_ELSE:
|
||||
assert(*branch_depth > 0);
|
||||
return (*branch_depth - 1) * 2;
|
||||
|
||||
default:
|
||||
return *branch_depth * 2;
|
||||
}
|
||||
default:
|
||||
return *branch_depth * 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
|
||||
static void
|
||||
rc_print_normal_instruction(FILE *f, struct rc_instruction *inst, unsigned *branch_depth)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned int reg;
|
||||
unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
unsigned int reg;
|
||||
unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
|
||||
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, "%s", opcode->Name);
|
||||
fprintf(f, "%s", opcode->Name);
|
||||
|
||||
switch(inst->U.I.SaturateMode) {
|
||||
case RC_SATURATE_NONE: break;
|
||||
case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
|
||||
case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
|
||||
default: fprintf(f, "_BAD_SAT"); break;
|
||||
}
|
||||
switch (inst->U.I.SaturateMode) {
|
||||
case RC_SATURATE_NONE:
|
||||
break;
|
||||
case RC_SATURATE_ZERO_ONE:
|
||||
fprintf(f, "_SAT");
|
||||
break;
|
||||
case RC_SATURATE_MINUS_PLUS_ONE:
|
||||
fprintf(f, "_SAT2");
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "_BAD_SAT");
|
||||
break;
|
||||
}
|
||||
|
||||
if (opcode->HasDstReg) {
|
||||
fprintf(f, " ");
|
||||
rc_print_dst_register(f, inst->U.I.DstReg);
|
||||
print_omod_op(f, inst->U.I.Omod);
|
||||
if (opcode->NumSrcRegs)
|
||||
fprintf(f, ",");
|
||||
}
|
||||
if (opcode->HasDstReg) {
|
||||
fprintf(f, " ");
|
||||
rc_print_dst_register(f, inst->U.I.DstReg);
|
||||
print_omod_op(f, inst->U.I.Omod);
|
||||
if (opcode->NumSrcRegs)
|
||||
fprintf(f, ",");
|
||||
}
|
||||
|
||||
for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
|
||||
if (reg > 0)
|
||||
fprintf(f, ",");
|
||||
fprintf(f, " ");
|
||||
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
|
||||
}
|
||||
for (reg = 0; reg < opcode->NumSrcRegs; ++reg) {
|
||||
if (reg > 0)
|
||||
fprintf(f, ",");
|
||||
fprintf(f, " ");
|
||||
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
|
||||
}
|
||||
|
||||
if (opcode->HasTexture) {
|
||||
fprintf(f, ", %s%s[%u]%s%s",
|
||||
textarget_to_string(inst->U.I.TexSrcTarget),
|
||||
inst->U.I.TexShadow ? "SHADOW" : "",
|
||||
inst->U.I.TexSrcUnit,
|
||||
inst->U.I.TexSemWait ? " SEM_WAIT" : "",
|
||||
inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : "");
|
||||
}
|
||||
if (opcode->HasTexture) {
|
||||
fprintf(f, ", %s%s[%u]%s%s", textarget_to_string(inst->U.I.TexSrcTarget),
|
||||
inst->U.I.TexShadow ? "SHADOW" : "", inst->U.I.TexSrcUnit,
|
||||
inst->U.I.TexSemWait ? " SEM_WAIT" : "",
|
||||
inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : "");
|
||||
}
|
||||
|
||||
fprintf(f, ";");
|
||||
fprintf(f, ";");
|
||||
|
||||
if (inst->U.I.WriteALUResult) {
|
||||
fprintf(f, " [aluresult = (");
|
||||
rc_print_comparefunc(f,
|
||||
(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
|
||||
inst->U.I.ALUResultCompare, "0");
|
||||
fprintf(f, ")]");
|
||||
}
|
||||
if (inst->U.I.WriteALUResult) {
|
||||
fprintf(f, " [aluresult = (");
|
||||
rc_print_comparefunc(f, (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
|
||||
inst->U.I.ALUResultCompare, "0");
|
||||
fprintf(f, ")]");
|
||||
}
|
||||
|
||||
if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
|
||||
fprintf(f, " PRED_SET");
|
||||
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
|
||||
fprintf(f, " PRED_INV");
|
||||
}
|
||||
if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
|
||||
fprintf(f, " PRED_SET");
|
||||
} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
|
||||
fprintf(f, " PRED_INV");
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
|
||||
static void
|
||||
rc_print_pair_instruction(FILE *f, struct rc_instruction *fullinst, unsigned *branch_depth)
|
||||
{
|
||||
struct rc_pair_instruction * inst = &fullinst->U.P;
|
||||
int printedsrc = 0;
|
||||
unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
|
||||
inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
|
||||
struct rc_pair_instruction *inst = &fullinst->U.P;
|
||||
int printedsrc = 0;
|
||||
unsigned spaces = update_branch_depth(
|
||||
inst->RGB.Opcode != RC_OPCODE_NOP ? inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
|
||||
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
for(unsigned int src = 0; src < 3; ++src) {
|
||||
if (inst->RGB.Src[src].Used) {
|
||||
if (printedsrc)
|
||||
fprintf(f, ", ");
|
||||
fprintf(f, "src%i.xyz = ", src);
|
||||
rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
|
||||
printedsrc = 1;
|
||||
}
|
||||
if (inst->Alpha.Src[src].Used) {
|
||||
if (printedsrc)
|
||||
fprintf(f, ", ");
|
||||
fprintf(f, "src%i.w = ", src);
|
||||
rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
|
||||
printedsrc = 1;
|
||||
}
|
||||
}
|
||||
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.xyz = %s",
|
||||
presubtract_op_to_string(
|
||||
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.w = %s",
|
||||
presubtract_op_to_string(
|
||||
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if (inst->SemWait) {
|
||||
fprintf(f, " SEM_WAIT");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
for (unsigned int src = 0; src < 3; ++src) {
|
||||
if (inst->RGB.Src[src].Used) {
|
||||
if (printedsrc)
|
||||
fprintf(f, ", ");
|
||||
fprintf(f, "src%i.xyz = ", src);
|
||||
rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
|
||||
printedsrc = 1;
|
||||
}
|
||||
if (inst->Alpha.Src[src].Used) {
|
||||
if (printedsrc)
|
||||
fprintf(f, ", ");
|
||||
fprintf(f, "src%i.w = ", src);
|
||||
rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
|
||||
printedsrc = 1;
|
||||
}
|
||||
}
|
||||
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.xyz = %s",
|
||||
presubtract_op_to_string(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.w = %s",
|
||||
presubtract_op_to_string(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if (inst->SemWait) {
|
||||
fprintf(f, " SEM_WAIT");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
||||
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
|
||||
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->RGB.Opcode);
|
||||
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
|
||||
if (inst->RGB.WriteMask)
|
||||
fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
|
||||
(inst->RGB.WriteMask & 1) ? "x" : "",
|
||||
(inst->RGB.WriteMask & 2) ? "y" : "",
|
||||
(inst->RGB.WriteMask & 4) ? "z" : "");
|
||||
if (inst->RGB.OutputWriteMask)
|
||||
fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
|
||||
(inst->RGB.OutputWriteMask & 1) ? "x" : "",
|
||||
(inst->RGB.OutputWriteMask & 2) ? "y" : "",
|
||||
(inst->RGB.OutputWriteMask & 4) ? "z" : "");
|
||||
if (inst->WriteALUResult == RC_ALURESULT_X)
|
||||
fprintf(f, " aluresult");
|
||||
fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
|
||||
if (inst->RGB.WriteMask)
|
||||
fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, (inst->RGB.WriteMask & 1) ? "x" : "",
|
||||
(inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : "");
|
||||
if (inst->RGB.OutputWriteMask)
|
||||
fprintf(
|
||||
f, " color[%i].%s%s%s", inst->RGB.Target, (inst->RGB.OutputWriteMask & 1) ? "x" : "",
|
||||
(inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : "");
|
||||
if (inst->WriteALUResult == RC_ALURESULT_X)
|
||||
fprintf(f, " aluresult");
|
||||
|
||||
print_omod_op(f, inst->RGB.Omod);
|
||||
print_omod_op(f, inst->RGB.Omod);
|
||||
|
||||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
|
||||
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f,"p");
|
||||
else
|
||||
fprintf(f,"%d", inst->RGB.Arg[arg].Source);
|
||||
fprintf(f,".%c%c%c%s",
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
|
||||
abs);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char *abs = inst->RGB.Arg[arg].Abs ? "|" : "";
|
||||
const char *neg = inst->RGB.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if (inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f, "p");
|
||||
else
|
||||
fprintf(f, "%d", inst->RGB.Arg[arg].Source);
|
||||
fprintf(f, ".%c%c%c%s", rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), abs);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
|
||||
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Alpha.Opcode);
|
||||
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
|
||||
if (inst->Alpha.WriteMask)
|
||||
fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
|
||||
if (inst->Alpha.OutputWriteMask)
|
||||
fprintf(f, " color[%i].w", inst->Alpha.Target);
|
||||
if (inst->Alpha.DepthWriteMask)
|
||||
fprintf(f, " depth.w");
|
||||
if (inst->WriteALUResult == RC_ALURESULT_W)
|
||||
fprintf(f, " aluresult");
|
||||
fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
|
||||
if (inst->Alpha.WriteMask)
|
||||
fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
|
||||
if (inst->Alpha.OutputWriteMask)
|
||||
fprintf(f, " color[%i].w", inst->Alpha.Target);
|
||||
if (inst->Alpha.DepthWriteMask)
|
||||
fprintf(f, " depth.w");
|
||||
if (inst->WriteALUResult == RC_ALURESULT_W)
|
||||
fprintf(f, " aluresult");
|
||||
|
||||
print_omod_op(f, inst->Alpha.Omod);
|
||||
print_omod_op(f, inst->Alpha.Omod);
|
||||
|
||||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
|
||||
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f,"p");
|
||||
else
|
||||
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
|
||||
fprintf(f,".%c%s",
|
||||
rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
for (unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char *abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
|
||||
const char *neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if (inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f, "p");
|
||||
else
|
||||
fprintf(f, "%d", inst->Alpha.Arg[arg].Source);
|
||||
fprintf(f, ".%c%s", rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
if (inst->WriteALUResult) {
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
if (inst->WriteALUResult) {
|
||||
for (unsigned i = 0; i < spaces; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, " [aluresult = (");
|
||||
rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
|
||||
fprintf(f, ")]\n");
|
||||
}
|
||||
fprintf(f, " [aluresult = (");
|
||||
rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
|
||||
fprintf(f, ")]\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Print program to stderr, default options.
|
||||
*/
|
||||
void rc_print_program(const struct rc_program *prog)
|
||||
void
|
||||
rc_print_program(const struct rc_program *prog)
|
||||
{
|
||||
unsigned int linenum = 0;
|
||||
unsigned branch_depth = 0;
|
||||
struct rc_instruction *inst;
|
||||
unsigned int linenum = 0;
|
||||
unsigned branch_depth = 0;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
fprintf(stderr, "# Radeon Compiler Program\n");
|
||||
fprintf(stderr, "# Radeon Compiler Program\n");
|
||||
|
||||
for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
|
||||
fprintf(stderr, "%3d: ", linenum);
|
||||
for (inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
|
||||
fprintf(stderr, "%3d: ", linenum);
|
||||
|
||||
if (inst->Type == RC_INSTRUCTION_PAIR)
|
||||
rc_print_pair_instruction(stderr, inst, &branch_depth);
|
||||
else
|
||||
rc_print_normal_instruction(stderr, inst, &branch_depth);
|
||||
if (inst->Type == RC_INSTRUCTION_PAIR)
|
||||
rc_print_pair_instruction(stderr, inst, &branch_depth);
|
||||
else
|
||||
rc_print_normal_instruction(stderr, inst, &branch_depth);
|
||||
|
||||
linenum++;
|
||||
}
|
||||
linenum++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,83 +10,79 @@
|
|||
|
||||
/* Series of transformations to be done on textures. */
|
||||
|
||||
static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
|
||||
int tmu)
|
||||
static struct rc_src_register
|
||||
shadow_fail_value(struct r300_fragment_program_compiler *compiler, int tmu)
|
||||
{
|
||||
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
|
||||
struct rc_src_register reg = {0, 0, 0, 0, 0, 0};
|
||||
|
||||
reg.File = RC_FILE_NONE;
|
||||
reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000,
|
||||
compiler->state.unit[tmu].texture_swizzle);
|
||||
return reg;
|
||||
reg.File = RC_FILE_NONE;
|
||||
reg.Swizzle = combine_swizzles(RC_SWIZZLE_0000, compiler->state.unit[tmu].texture_swizzle);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
|
||||
int tmu)
|
||||
static struct rc_src_register
|
||||
shadow_pass_value(struct r300_fragment_program_compiler *compiler, int tmu)
|
||||
{
|
||||
struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
|
||||
struct rc_src_register reg = {0, 0, 0, 0, 0, 0};
|
||||
|
||||
reg.File = RC_FILE_NONE;
|
||||
reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
|
||||
compiler->state.unit[tmu].texture_swizzle);
|
||||
return reg;
|
||||
reg.File = RC_FILE_NONE;
|
||||
reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111, compiler->state.unit[tmu].texture_swizzle);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
|
||||
struct rc_instruction *inst,
|
||||
unsigned state_constant)
|
||||
static void
|
||||
scale_texcoords(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst,
|
||||
unsigned state_constant)
|
||||
{
|
||||
struct rc_instruction *inst_mov;
|
||||
struct rc_instruction *inst_mov;
|
||||
|
||||
unsigned temp = rc_find_free_temporary(&compiler->Base);
|
||||
unsigned temp = rc_find_free_temporary(&compiler->Base);
|
||||
|
||||
inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mov->U.I.SrcReg[1].Index =
|
||||
rc_constants_add_state(&compiler->Base.Program.Constants,
|
||||
state_constant, inst->U.I.TexSrcUnit);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mov->U.I.SrcReg[1].Index = rc_constants_add_state(&compiler->Base.Program.Constants,
|
||||
state_constant, inst->U.I.TexSrcUnit);
|
||||
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
}
|
||||
|
||||
static void projective_divide(struct r300_fragment_program_compiler *compiler,
|
||||
struct rc_instruction *inst)
|
||||
static void
|
||||
projective_divide(struct r300_fragment_program_compiler *compiler, struct rc_instruction *inst)
|
||||
{
|
||||
struct rc_instruction *inst_mul, *inst_rcp;
|
||||
struct rc_instruction *inst_mul, *inst_rcp;
|
||||
|
||||
unsigned temp = rc_find_free_temporary(&compiler->Base);
|
||||
unsigned temp = rc_find_free_temporary(&compiler->Base);
|
||||
|
||||
inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = temp;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
/* Because the input can be arbitrarily swizzled,
|
||||
* read the component mapped to W. */
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
|
||||
inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = temp;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
/* Because the input can be arbitrarily swizzled,
|
||||
* read the component mapped to W. */
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
|
||||
|
||||
inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = temp;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = temp;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = temp;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = temp;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.Opcode = RC_OPCODE_TEX;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.Opcode = RC_OPCODE_TEX;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -97,353 +93,343 @@ static void projective_divide(struct r300_fragment_program_compiler *compiler,
|
|||
* - extract operand swizzles
|
||||
* - introduce a temporary register when write masks are needed
|
||||
*/
|
||||
int radeonTransformTEX(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void* data)
|
||||
int
|
||||
radeonTransformTEX(struct radeon_compiler *c, struct rc_instruction *inst, void *data)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler =
|
||||
(struct r300_fragment_program_compiler*)data;
|
||||
rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
|
||||
int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT;
|
||||
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)data;
|
||||
rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
|
||||
int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT;
|
||||
|
||||
if (inst->U.I.Opcode != RC_OPCODE_TEX &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXB &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXP &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXD &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXL &&
|
||||
inst->U.I.Opcode != RC_OPCODE_KIL)
|
||||
return 0;
|
||||
if (inst->U.I.Opcode != RC_OPCODE_TEX && inst->U.I.Opcode != RC_OPCODE_TXB &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXP && inst->U.I.Opcode != RC_OPCODE_TXD &&
|
||||
inst->U.I.Opcode != RC_OPCODE_TXL && inst->U.I.Opcode != RC_OPCODE_KIL)
|
||||
return 0;
|
||||
|
||||
/* ARB_shadow & EXT_shadow_funcs */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
((c->Program.ShadowSamplers & (1U << inst->U.I.TexSrcUnit)) ||
|
||||
(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
|
||||
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
|
||||
/* ARB_shadow & EXT_shadow_funcs */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
((c->Program.ShadowSamplers & (1U << inst->U.I.TexSrcUnit)) ||
|
||||
(compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
|
||||
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
|
||||
|
||||
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
|
||||
inst->U.I.Opcode = RC_OPCODE_MOV;
|
||||
|
||||
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
|
||||
inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
|
||||
} else {
|
||||
inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
|
||||
}
|
||||
if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
|
||||
inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
|
||||
} else {
|
||||
inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else {
|
||||
struct rc_instruction * inst_rcp = NULL;
|
||||
struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
|
||||
unsigned tmp_texsample;
|
||||
unsigned tmp_sum;
|
||||
int pass, fail;
|
||||
return 1;
|
||||
} else {
|
||||
struct rc_instruction *inst_rcp = NULL;
|
||||
struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
|
||||
unsigned tmp_texsample;
|
||||
unsigned tmp_sum;
|
||||
int pass, fail;
|
||||
|
||||
/* Save the output register. */
|
||||
struct rc_dst_register output_reg = inst->U.I.DstReg;
|
||||
unsigned saturate_mode = inst->U.I.SaturateMode;
|
||||
/* Save the output register. */
|
||||
struct rc_dst_register output_reg = inst->U.I.DstReg;
|
||||
unsigned saturate_mode = inst->U.I.SaturateMode;
|
||||
|
||||
/* Redirect TEX to a new temp. */
|
||||
tmp_texsample = rc_find_free_temporary(c);
|
||||
inst->U.I.SaturateMode = 0;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tmp_texsample;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
/* Redirect TEX to a new temp. */
|
||||
tmp_texsample = rc_find_free_temporary(c);
|
||||
inst->U.I.SaturateMode = 0;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = tmp_texsample;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
|
||||
tmp_sum = rc_find_free_temporary(c);
|
||||
tmp_sum = rc_find_free_temporary(c);
|
||||
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
|
||||
/* Compute 1/W. */
|
||||
inst_rcp = rc_insert_new_instruction(c, inst);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = tmp_sum;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
|
||||
}
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
|
||||
/* Compute 1/W. */
|
||||
inst_rcp = rc_insert_new_instruction(c, inst);
|
||||
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_rcp->U.I.DstReg.Index = tmp_sum;
|
||||
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_rcp->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
|
||||
}
|
||||
|
||||
/* Divide Z by W (if it's TXP) and saturate. */
|
||||
inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
|
||||
inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = tmp_sum;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = tmp_sum;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
}
|
||||
/* Divide Z by W (if it's TXP) and saturate. */
|
||||
inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
|
||||
inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = tmp_sum;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[0].Swizzle =
|
||||
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
|
||||
inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.SrcReg[1].Index = tmp_sum;
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
|
||||
}
|
||||
|
||||
/* Add the depth texture value. */
|
||||
inst_add = rc_insert_new_instruction(c, inst_mul);
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = tmp_sum;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[0].Index = tmp_sum;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[1].Index = tmp_texsample;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
|
||||
/* Add the depth texture value. */
|
||||
inst_add = rc_insert_new_instruction(c, inst_mul);
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = tmp_sum;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[0].Index = tmp_sum;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[1].Index = tmp_texsample;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
|
||||
|
||||
/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
|
||||
* LESS: r < tex <=> -tex+r < 0
|
||||
* GEQUAL: r >= tex <=> not (-tex+r < 0)
|
||||
* GREATER: r > tex <=> tex-r < 0
|
||||
* LEQUAL: r <= tex <=> not ( tex-r < 0)
|
||||
* EQUAL: GEQUAL
|
||||
* NOTEQUAL:LESS
|
||||
*/
|
||||
/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
|
||||
* LESS: r < tex <=> -tex+r < 0
|
||||
* GEQUAL: r >= tex <=> not (-tex+r < 0)
|
||||
* GREATER: r > tex <=> tex-r < 0
|
||||
* LEQUAL: r <= tex <=> not ( tex-r < 0)
|
||||
* EQUAL: GEQUAL
|
||||
* NOTEQUAL:LESS
|
||||
*/
|
||||
|
||||
/* This negates either r or tex: */
|
||||
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
|
||||
comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
|
||||
inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
|
||||
else
|
||||
inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
|
||||
/* This negates either r or tex: */
|
||||
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
|
||||
comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
|
||||
inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
|
||||
else
|
||||
inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
|
||||
|
||||
/* This negates the whole expression: */
|
||||
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
|
||||
comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
}
|
||||
/* This negates the whole expression: */
|
||||
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
|
||||
comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
|
||||
pass = 1;
|
||||
fail = 2;
|
||||
} else {
|
||||
pass = 2;
|
||||
fail = 1;
|
||||
}
|
||||
|
||||
inst_cmp = rc_insert_new_instruction(c, inst_add);
|
||||
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
|
||||
inst_cmp->U.I.SaturateMode = saturate_mode;
|
||||
inst_cmp->U.I.DstReg = output_reg;
|
||||
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
|
||||
inst_cmp->U.I.SrcReg[0].Swizzle =
|
||||
combine_swizzles(RC_SWIZZLE_WWWW,
|
||||
compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
|
||||
inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
|
||||
inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
|
||||
inst_cmp = rc_insert_new_instruction(c, inst_add);
|
||||
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
|
||||
inst_cmp->U.I.SaturateMode = saturate_mode;
|
||||
inst_cmp->U.I.DstReg = output_reg;
|
||||
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
|
||||
inst_cmp->U.I.SrcReg[0].Swizzle = combine_swizzles(
|
||||
RC_SWIZZLE_WWWW, compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
|
||||
inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
|
||||
inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
|
||||
|
||||
assert(tmp_texsample != tmp_sum);
|
||||
}
|
||||
}
|
||||
assert(tmp_texsample != tmp_sum);
|
||||
}
|
||||
}
|
||||
|
||||
/* R300 cannot sample from rectangles and the wrap mode fallback needs
|
||||
* normalized coordinates anyway. */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
|
||||
scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
|
||||
inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
|
||||
}
|
||||
/* R300 cannot sample from rectangles and the wrap mode fallback needs
|
||||
* normalized coordinates anyway. */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL && is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
|
||||
scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
|
||||
inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
|
||||
}
|
||||
|
||||
/* Divide by W if needed. */
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP &&
|
||||
(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
|
||||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
|
||||
projective_divide(compiler, inst);
|
||||
}
|
||||
/* Divide by W if needed. */
|
||||
if (inst->U.I.Opcode == RC_OPCODE_TXP &&
|
||||
(wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
|
||||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
|
||||
projective_divide(compiler, inst);
|
||||
}
|
||||
|
||||
/* Texture wrap modes don't work on NPOT textures.
|
||||
*
|
||||
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
|
||||
* mirroring are not. If we need to repeat, we do:
|
||||
*
|
||||
* MUL temp, texcoord, <scaling factor constant>
|
||||
* FRC temp, temp ; Discard integer portion of coords
|
||||
*
|
||||
* This gives us coords in [0, 1].
|
||||
*
|
||||
* Mirroring is trickier. We're going to start out like repeat:
|
||||
*
|
||||
* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
|
||||
* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
|
||||
* ; so scale to [0, 1]
|
||||
* FRC temp, temp ; Make the pattern repeat
|
||||
* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
|
||||
* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
|
||||
* ; The pattern is backwards, so reverse it (1-x).
|
||||
*
|
||||
* This gives us coords in [0, 1].
|
||||
*
|
||||
* ~ C & M. ;)
|
||||
*/
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
wrapmode != RC_WRAP_NONE) {
|
||||
struct rc_instruction *inst_mov;
|
||||
unsigned temp = rc_find_free_temporary(c);
|
||||
/* Texture wrap modes don't work on NPOT textures.
|
||||
*
|
||||
* Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
|
||||
* mirroring are not. If we need to repeat, we do:
|
||||
*
|
||||
* MUL temp, texcoord, <scaling factor constant>
|
||||
* FRC temp, temp ; Discard integer portion of coords
|
||||
*
|
||||
* This gives us coords in [0, 1].
|
||||
*
|
||||
* Mirroring is trickier. We're going to start out like repeat:
|
||||
*
|
||||
* MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
|
||||
* MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
|
||||
* ; so scale to [0, 1]
|
||||
* FRC temp, temp ; Make the pattern repeat
|
||||
* MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
|
||||
* ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
|
||||
* ; The pattern is backwards, so reverse it (1-x).
|
||||
*
|
||||
* This gives us coords in [0, 1].
|
||||
*
|
||||
* ~ C & M. ;)
|
||||
*/
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL && wrapmode != RC_WRAP_NONE) {
|
||||
struct rc_instruction *inst_mov;
|
||||
unsigned temp = rc_find_free_temporary(c);
|
||||
|
||||
if (wrapmode == RC_WRAP_REPEAT) {
|
||||
/* Both instructions will be paired up. */
|
||||
struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
|
||||
if (wrapmode == RC_WRAP_REPEAT) {
|
||||
/* Both instructions will be paired up. */
|
||||
struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
|
||||
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.DstReg.Index = temp;
|
||||
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
|
||||
/*
|
||||
* Function:
|
||||
* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
|
||||
*
|
||||
* Code:
|
||||
* MUL temp, src0, 0.5
|
||||
* FRC temp, temp
|
||||
* MAD temp, temp, 2, -1
|
||||
* ADD temp, 1, -abs(temp)
|
||||
*/
|
||||
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
|
||||
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.DstReg.Index = temp;
|
||||
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
|
||||
/*
|
||||
* Function:
|
||||
* f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
|
||||
*
|
||||
* Code:
|
||||
* MUL temp, src0, 0.5
|
||||
* FRC temp, temp
|
||||
* MAD temp, temp, 2, -1
|
||||
* ADD temp, 1, -abs(temp)
|
||||
*/
|
||||
|
||||
struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
|
||||
unsigned two, two_swizzle;
|
||||
struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
|
||||
unsigned two, two_swizzle;
|
||||
|
||||
inst_mul = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mul = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = temp;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
|
||||
inst_mul->U.I.Opcode = RC_OPCODE_MUL;
|
||||
inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mul->U.I.DstReg.Index = temp;
|
||||
inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
|
||||
|
||||
inst_frc = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_frc = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
|
||||
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.DstReg.Index = temp;
|
||||
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.SrcReg[0].Index = temp;
|
||||
inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_frc->U.I.Opcode = RC_OPCODE_FRC;
|
||||
inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.DstReg.Index = temp;
|
||||
inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_frc->U.I.SrcReg[0].Index = temp;
|
||||
inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
|
||||
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
|
||||
inst_mad = rc_insert_new_instruction(c, inst->Prev);
|
||||
two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
|
||||
inst_mad = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
|
||||
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.DstReg.Index = temp;
|
||||
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.SrcReg[0].Index = temp;
|
||||
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[1].Index = two;
|
||||
inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
|
||||
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
|
||||
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
|
||||
inst_mad->U.I.Opcode = RC_OPCODE_MAD;
|
||||
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.DstReg.Index = temp;
|
||||
inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mad->U.I.SrcReg[0].Index = temp;
|
||||
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
|
||||
inst_mad->U.I.SrcReg[1].Index = two;
|
||||
inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
|
||||
inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
|
||||
inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
|
||||
|
||||
inst_add = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_add = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = temp;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[1].Index = temp;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_add->U.I.SrcReg[1].Abs = 1;
|
||||
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
|
||||
} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
|
||||
/*
|
||||
* Mirrored clamp modes are bloody simple, we just use abs
|
||||
* to mirror [0, 1] into [-1, 0]. This works for
|
||||
* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
|
||||
*/
|
||||
struct rc_instruction *inst_mov;
|
||||
inst_add->U.I.Opcode = RC_OPCODE_ADD;
|
||||
inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.DstReg.Index = temp;
|
||||
inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
|
||||
inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
|
||||
inst_add->U.I.SrcReg[1].Index = temp;
|
||||
inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
|
||||
inst_add->U.I.SrcReg[1].Abs = 1;
|
||||
inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
|
||||
} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
|
||||
/*
|
||||
* Mirrored clamp modes are bloody simple, we just use abs
|
||||
* to mirror [0, 1] into [-1, 0]. This works for
|
||||
* all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
|
||||
*/
|
||||
struct rc_instruction *inst_mov;
|
||||
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.SrcReg[0].Abs = 1;
|
||||
}
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.SrcReg[0].Abs = 1;
|
||||
}
|
||||
|
||||
/* Preserve W for TXP/TXB. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
/* Preserve W for TXP/TXB. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
}
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
}
|
||||
|
||||
/* NPOT -> POT conversion for 3D textures. */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
|
||||
struct rc_instruction *inst_mov;
|
||||
unsigned temp = rc_find_free_temporary(c);
|
||||
/* NPOT -> POT conversion for 3D textures. */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
|
||||
struct rc_instruction *inst_mov;
|
||||
unsigned temp = rc_find_free_temporary(c);
|
||||
|
||||
/* Saturate XYZ. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
/* Saturate XYZ. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
|
||||
/* Copy W. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
/* Copy W. */
|
||||
inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = temp;
|
||||
inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = temp;
|
||||
|
||||
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
|
||||
}
|
||||
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
|
||||
}
|
||||
|
||||
/* Cannot write texture to output registers or with saturate (all chips),
|
||||
* or with masks (non-r500). */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
|
||||
inst->U.I.SaturateMode ||
|
||||
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
|
||||
/* Cannot write texture to output registers or with saturate (all chips),
|
||||
* or with masks (non-r500). */
|
||||
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
|
||||
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.SaturateMode ||
|
||||
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
|
||||
struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
|
||||
inst_mov->U.I.DstReg = inst->U.I.DstReg;
|
||||
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
|
||||
inst_mov->U.I.DstReg = inst->U.I.DstReg;
|
||||
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
|
||||
|
||||
inst->U.I.SaturateMode = 0;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
}
|
||||
inst->U.I.SaturateMode = 0;
|
||||
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
|
||||
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
|
||||
}
|
||||
|
||||
/* Cannot read texture coordinate from constants file */
|
||||
if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
|
||||
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
/* Cannot read texture coordinate from constants file */
|
||||
if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
|
||||
struct rc_instruction *inst_mov = rc_insert_new_instruction(c, inst->Prev);
|
||||
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
|
||||
inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
|
||||
inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
|
||||
inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
|
||||
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
|
||||
}
|
||||
reset_srcreg(&inst->U.I.SrcReg[0]);
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
|
||||
inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,9 +9,6 @@
|
|||
#include "radeon_compiler.h"
|
||||
#include "radeon_program.h"
|
||||
|
||||
int radeonTransformTEX(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
void* data);
|
||||
int radeonTransformTEX(struct radeon_compiler *c, struct rc_instruction *inst, void *data);
|
||||
|
||||
#endif /* __RADEON_PROGRAM_TEX_H_ */
|
||||
|
|
|
|||
|
|
@ -9,435 +9,494 @@
|
|||
|
||||
#define VERBOSE 0
|
||||
|
||||
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||
#define DBG(...) \
|
||||
do { \
|
||||
if (VERBOSE) \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
const struct rc_class rc_class_list_vp [] = {
|
||||
{RC_REG_CLASS_VP_SINGLE, 4,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_Y,
|
||||
RC_MASK_Z,
|
||||
RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_VP_DOUBLE, 6,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_Z | RC_MASK_W}},
|
||||
{RC_REG_CLASS_VP_TRIPLE, 4,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_VP_QUADRUPLE, 1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}}
|
||||
};
|
||||
const struct rc_class rc_class_list_vp[] = {
|
||||
{
|
||||
RC_REG_CLASS_VP_SINGLE,
|
||||
4,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_Y,
|
||||
RC_MASK_Z,
|
||||
RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_VP_DOUBLE,
|
||||
6,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_Z | RC_MASK_W},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_VP_TRIPLE,
|
||||
4,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_VP_QUADRUPLE,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
}};
|
||||
|
||||
const struct rc_class rc_class_list_fp [] = {
|
||||
{RC_REG_CLASS_FP_SINGLE, 3,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_Y,
|
||||
RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_DOUBLE, 3,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_TRIPLE, 1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_ALPHA, 1,
|
||||
{RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3,
|
||||
{RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_X, 1,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_Y, 1,
|
||||
{RC_MASK_Y,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_Z, 1,
|
||||
{RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_XY, 1,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_YZ, 1,
|
||||
{RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_XZ, 1,
|
||||
{RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_XW, 1,
|
||||
{RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_YW, 1,
|
||||
{RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_ZW, 1,
|
||||
{RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_XYW, 1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_YZW, 1,
|
||||
{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}},
|
||||
{RC_REG_CLASS_FP_XZW, 1,
|
||||
{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE}}
|
||||
};
|
||||
const struct rc_class rc_class_list_fp[] = {
|
||||
{
|
||||
RC_REG_CLASS_FP_SINGLE,
|
||||
3,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_Y,
|
||||
RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_DOUBLE,
|
||||
3,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_TRIPLE,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_ALPHA,
|
||||
1,
|
||||
{RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
|
||||
3,
|
||||
{RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
|
||||
3,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_X,
|
||||
1,
|
||||
{RC_MASK_X,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_Y,
|
||||
1,
|
||||
{RC_MASK_Y,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_Z,
|
||||
1,
|
||||
{RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_XY,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Y,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_YZ,
|
||||
1,
|
||||
{RC_MASK_Y | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_XZ,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Z,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_XW,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_YW,
|
||||
1,
|
||||
{RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_ZW,
|
||||
1,
|
||||
{RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_XYW,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_YZW,
|
||||
1,
|
||||
{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
},
|
||||
{
|
||||
RC_REG_CLASS_FP_XZW,
|
||||
1,
|
||||
{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE,
|
||||
RC_MASK_NONE},
|
||||
}};
|
||||
|
||||
static void print_live_intervals(struct live_intervals * src)
|
||||
static void
|
||||
print_live_intervals(struct live_intervals *src)
|
||||
{
|
||||
if (!src || !src->Used) {
|
||||
DBG("(null)");
|
||||
return;
|
||||
}
|
||||
if (!src || !src->Used) {
|
||||
DBG("(null)");
|
||||
return;
|
||||
}
|
||||
|
||||
DBG("(%i,%i)", src->Start, src->End);
|
||||
DBG("(%i,%i)", src->Start, src->End);
|
||||
}
|
||||
|
||||
static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
|
||||
static int
|
||||
overlap_live_intervals(struct live_intervals *a, struct live_intervals *b)
|
||||
{
|
||||
if (VERBOSE) {
|
||||
DBG("overlap_live_intervals: ");
|
||||
print_live_intervals(a);
|
||||
DBG(" to ");
|
||||
print_live_intervals(b);
|
||||
DBG("\n");
|
||||
}
|
||||
if (VERBOSE) {
|
||||
DBG("overlap_live_intervals: ");
|
||||
print_live_intervals(a);
|
||||
DBG(" to ");
|
||||
print_live_intervals(b);
|
||||
DBG("\n");
|
||||
}
|
||||
|
||||
if (!a->Used || !b->Used) {
|
||||
DBG(" unused interval\n");
|
||||
return 0;
|
||||
}
|
||||
if (!a->Used || !b->Used) {
|
||||
DBG(" unused interval\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a->Start > b->Start) {
|
||||
if (a->Start < b->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
} else if (b->Start > a->Start) {
|
||||
if (b->Start < a->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
} else { /* a->Start == b->Start */
|
||||
if (a->Start != a->End && b->Start != b->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (a->Start > b->Start) {
|
||||
if (a->Start < b->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
} else if (b->Start > a->Start) {
|
||||
if (b->Start < a->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
} else { /* a->Start == b->Start */
|
||||
if (a->Start != a->End && b->Start != b->End) {
|
||||
DBG(" overlap\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
DBG(" no overlap\n");
|
||||
DBG(" no overlap\n");
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rc_find_class(
|
||||
const struct rc_class * classes,
|
||||
unsigned int writemask,
|
||||
unsigned int max_writemask_count)
|
||||
int
|
||||
rc_find_class(const struct rc_class *classes, unsigned int writemask,
|
||||
unsigned int max_writemask_count)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
|
||||
unsigned int j;
|
||||
if (classes[i].WritemaskCount > max_writemask_count) {
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < classes[i].WritemaskCount; j++) {
|
||||
if (classes[i].Writemasks[j] == writemask) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
unsigned int i;
|
||||
for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
|
||||
unsigned int j;
|
||||
if (classes[i].WritemaskCount > max_writemask_count) {
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < classes[i].WritemaskCount; j++) {
|
||||
if (classes[i].Writemasks[j] == writemask) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned int rc_overlap_live_intervals_array(
|
||||
struct live_intervals * a,
|
||||
struct live_intervals * b)
|
||||
unsigned int
|
||||
rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b)
|
||||
{
|
||||
unsigned int a_chan, b_chan;
|
||||
for (a_chan = 0; a_chan < 4; a_chan++) {
|
||||
for (b_chan = 0; b_chan < 4; b_chan++) {
|
||||
if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
unsigned int a_chan, b_chan;
|
||||
for (a_chan = 0; a_chan < 4; a_chan++) {
|
||||
for (b_chan = 0; b_chan < 4; b_chan++) {
|
||||
if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if VERBOSE
|
||||
static void print_reg(int reg)
|
||||
static void
|
||||
print_reg(int reg)
|
||||
{
|
||||
unsigned int index = reg_get_index(reg);
|
||||
unsigned int mask = reg_get_writemask(reg);
|
||||
fprintf(stderr, "Temp[%u].%c%c%c%c", index,
|
||||
mask & RC_MASK_X ? 'x' : '_',
|
||||
mask & RC_MASK_Y ? 'y' : '_',
|
||||
mask & RC_MASK_Z ? 'z' : '_',
|
||||
mask & RC_MASK_W ? 'w' : '_');
|
||||
unsigned int index = reg_get_index(reg);
|
||||
unsigned int mask = reg_get_writemask(reg);
|
||||
fprintf(stderr, "Temp[%u].%c%c%c%c", index, mask & RC_MASK_X ? 'x' : '_',
|
||||
mask & RC_MASK_Y ? 'y' : '_', mask & RC_MASK_Z ? 'z' : '_',
|
||||
mask & RC_MASK_W ? 'w' : '_');
|
||||
}
|
||||
#endif
|
||||
|
||||
static void add_register_conflicts(
|
||||
struct ra_regs * regs,
|
||||
unsigned int max_temp_regs)
|
||||
static void
|
||||
add_register_conflicts(struct ra_regs *regs, unsigned int max_temp_regs)
|
||||
{
|
||||
unsigned int index, a_mask, b_mask;
|
||||
for (index = 0; index < max_temp_regs; index++) {
|
||||
for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
|
||||
for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
|
||||
b_mask++) {
|
||||
if (a_mask & b_mask) {
|
||||
ra_add_reg_conflict(regs,
|
||||
get_reg_id(index, a_mask),
|
||||
get_reg_id(index, b_mask));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
unsigned int index, a_mask, b_mask;
|
||||
for (index = 0; index < max_temp_regs; index++) {
|
||||
for (a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
|
||||
for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; b_mask++) {
|
||||
if (a_mask & b_mask) {
|
||||
ra_add_reg_conflict(regs, get_reg_id(index, a_mask), get_reg_id(index, b_mask));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_build_interference_graph(
|
||||
struct ra_graph * graph,
|
||||
struct rc_list * variables)
|
||||
void
|
||||
rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables)
|
||||
{
|
||||
unsigned node_index;
|
||||
struct rc_list * var_ptr;
|
||||
unsigned node_index;
|
||||
struct rc_list *var_ptr;
|
||||
|
||||
/* Build the interference graph */
|
||||
for (var_ptr = variables, node_index = 0; var_ptr;
|
||||
var_ptr = var_ptr->Next, node_index++) {
|
||||
struct rc_list * a, * b;
|
||||
unsigned int b_index;
|
||||
/* Build the interference graph */
|
||||
for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
|
||||
struct rc_list *a, *b;
|
||||
unsigned int b_index;
|
||||
|
||||
for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
|
||||
b; b = b->Next, b_index++) {
|
||||
struct rc_variable * var_a = a->Item;
|
||||
while (var_a) {
|
||||
struct rc_variable * var_b = b->Item;
|
||||
while (var_b) {
|
||||
if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
|
||||
ra_add_node_interference(graph,
|
||||
node_index, b_index);
|
||||
}
|
||||
var_b = var_b->Friend;
|
||||
}
|
||||
var_a = var_a->Friend;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; b; b = b->Next, b_index++) {
|
||||
struct rc_variable *var_a = a->Item;
|
||||
while (var_a) {
|
||||
struct rc_variable *var_b = b->Item;
|
||||
while (var_b) {
|
||||
if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
|
||||
ra_add_node_interference(graph, node_index, b_index);
|
||||
}
|
||||
var_b = var_b->Friend;
|
||||
}
|
||||
var_a = var_a->Friend;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
|
||||
void
|
||||
rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
|
||||
{
|
||||
unsigned i, j, index, class_count, max_temps;
|
||||
unsigned **ra_q_values;
|
||||
unsigned i, j, index, class_count, max_temps;
|
||||
unsigned **ra_q_values;
|
||||
|
||||
/* Pre-computed q values. This array describes the maximum number of
|
||||
* a class's [row] registers that are in conflict with a single
|
||||
* register from another class [column].
|
||||
*
|
||||
* For example:
|
||||
* q_values[0][2] is 3, because a register from class 2
|
||||
* (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
|
||||
* class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
|
||||
* and T0.z.
|
||||
*
|
||||
* q_values[2][0] is 1, because a register from class 0
|
||||
* (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
|
||||
* class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
|
||||
*
|
||||
* The q values for each register class [row] will never be greater
|
||||
* than the maximum number of writemask combinations for that class.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
|
||||
* so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
|
||||
* than 1.
|
||||
*/
|
||||
const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
|
||||
{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
|
||||
{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
|
||||
{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
|
||||
{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
|
||||
};
|
||||
/* Pre-computed q values. This array describes the maximum number of
|
||||
* a class's [row] registers that are in conflict with a single
|
||||
* register from another class [column].
|
||||
*
|
||||
* For example:
|
||||
* q_values[0][2] is 3, because a register from class 2
|
||||
* (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
|
||||
* class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
|
||||
* and T0.z.
|
||||
*
|
||||
* q_values[2][0] is 1, because a register from class 0
|
||||
* (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
|
||||
* class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
|
||||
*
|
||||
* The q values for each register class [row] will never be greater
|
||||
* than the maximum number of writemask combinations for that class.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
|
||||
* so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
|
||||
* than 1.
|
||||
*/
|
||||
const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
|
||||
{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
|
||||
{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
|
||||
{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
|
||||
{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
|
||||
|
||||
const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {
|
||||
{1, 2, 3, 4},
|
||||
{3, 5, 6, 6},
|
||||
{3, 4, 4, 4},
|
||||
{1, 1, 1, 1}
|
||||
};
|
||||
const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {{1, 2, 3, 4},
|
||||
{3, 5, 6, 6},
|
||||
{3, 4, 4, 4},
|
||||
{1, 1, 1, 1}};
|
||||
|
||||
if (prog == RC_FRAGMENT_PROGRAM) {
|
||||
s->class_list = rc_class_list_fp;
|
||||
class_count = RC_REG_CLASS_FP_COUNT;
|
||||
max_temps = R500_PFS_NUM_TEMP_REGS;
|
||||
} else {
|
||||
s->class_list = rc_class_list_vp;
|
||||
class_count = RC_REG_CLASS_VP_COUNT;
|
||||
max_temps = R300_VS_MAX_TEMPS;
|
||||
}
|
||||
if (prog == RC_FRAGMENT_PROGRAM) {
|
||||
s->class_list = rc_class_list_fp;
|
||||
class_count = RC_REG_CLASS_FP_COUNT;
|
||||
max_temps = R500_PFS_NUM_TEMP_REGS;
|
||||
} else {
|
||||
s->class_list = rc_class_list_vp;
|
||||
class_count = RC_REG_CLASS_VP_COUNT;
|
||||
max_temps = R300_VS_MAX_TEMPS;
|
||||
}
|
||||
|
||||
/* Allocate the main ra data structure */
|
||||
s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW,
|
||||
true);
|
||||
/* Allocate the main ra data structure */
|
||||
s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, true);
|
||||
|
||||
/* Create the register classes */
|
||||
for (i = 0; i < class_count; i++) {
|
||||
const struct rc_class *class = &s->class_list[i];
|
||||
s->classes[class->ID] = ra_alloc_reg_class(s->regs);
|
||||
/* Create the register classes */
|
||||
for (i = 0; i < class_count; i++) {
|
||||
const struct rc_class *class = &s->class_list[i];
|
||||
s->classes[class->ID] = ra_alloc_reg_class(s->regs);
|
||||
|
||||
/* Assign registers to the classes */
|
||||
for (index = 0; index < max_temps; index++) {
|
||||
for (j = 0; j < class->WritemaskCount; j++) {
|
||||
int reg_id = get_reg_id(index,
|
||||
class->Writemasks[j]);
|
||||
ra_class_add_reg(s->classes[class->ID], reg_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Assign registers to the classes */
|
||||
for (index = 0; index < max_temps; index++) {
|
||||
for (j = 0; j < class->WritemaskCount; j++) {
|
||||
int reg_id = get_reg_id(index, class->Writemasks[j]);
|
||||
ra_class_add_reg(s->classes[class->ID], reg_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the q values. The q_values array is indexed based on
|
||||
* the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
|
||||
* different than the ID assigned to that class by ra.
|
||||
* This why we need to manually construct this list.
|
||||
*/
|
||||
ra_q_values = MALLOC(class_count * sizeof(unsigned *));
|
||||
/* Set the q values. The q_values array is indexed based on
|
||||
* the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
|
||||
* different than the ID assigned to that class by ra.
|
||||
* This why we need to manually construct this list.
|
||||
*/
|
||||
ra_q_values = MALLOC(class_count * sizeof(unsigned *));
|
||||
|
||||
for (i = 0; i < class_count; i++) {
|
||||
ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
|
||||
for (j = 0; j < class_count; j++) {
|
||||
if (prog == RC_FRAGMENT_PROGRAM)
|
||||
ra_q_values[i][j] = q_values_fp[i][j];
|
||||
else
|
||||
ra_q_values[i][j] = q_values_vp[i][j];
|
||||
}
|
||||
}
|
||||
for (i = 0; i < class_count; i++) {
|
||||
ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
|
||||
for (j = 0; j < class_count; j++) {
|
||||
if (prog == RC_FRAGMENT_PROGRAM)
|
||||
ra_q_values[i][j] = q_values_fp[i][j];
|
||||
else
|
||||
ra_q_values[i][j] = q_values_vp[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
/* Add register conflicts */
|
||||
add_register_conflicts(s->regs, max_temps);
|
||||
/* Add register conflicts */
|
||||
add_register_conflicts(s->regs, max_temps);
|
||||
|
||||
ra_set_finalize(s->regs, ra_q_values);
|
||||
ra_set_finalize(s->regs, ra_q_values);
|
||||
|
||||
for (i = 0; i < class_count; i++) {
|
||||
FREE(ra_q_values[i]);
|
||||
}
|
||||
FREE(ra_q_values);
|
||||
for (i = 0; i < class_count; i++) {
|
||||
FREE(ra_q_values[i]);
|
||||
}
|
||||
FREE(ra_q_values);
|
||||
}
|
||||
|
||||
void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
|
||||
void
|
||||
rc_destroy_regalloc_state(struct rc_regalloc_state *s)
|
||||
{
|
||||
ralloc_free(s->regs);
|
||||
ralloc_free(s->regs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,114 +9,111 @@
|
|||
#ifndef RADEON_REGALLOC_H
|
||||
#define RADEON_REGALLOC_H
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/register_allocate.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include "radeon_variable.h"
|
||||
|
||||
struct ra_regs;
|
||||
|
||||
enum rc_reg_class {
|
||||
RC_REG_CLASS_FP_SINGLE,
|
||||
RC_REG_CLASS_FP_DOUBLE,
|
||||
RC_REG_CLASS_FP_TRIPLE,
|
||||
RC_REG_CLASS_FP_ALPHA,
|
||||
RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_X,
|
||||
RC_REG_CLASS_FP_Y,
|
||||
RC_REG_CLASS_FP_Z,
|
||||
RC_REG_CLASS_FP_XY,
|
||||
RC_REG_CLASS_FP_YZ,
|
||||
RC_REG_CLASS_FP_XZ,
|
||||
RC_REG_CLASS_FP_XW,
|
||||
RC_REG_CLASS_FP_YW,
|
||||
RC_REG_CLASS_FP_ZW,
|
||||
RC_REG_CLASS_FP_XYW,
|
||||
RC_REG_CLASS_FP_YZW,
|
||||
RC_REG_CLASS_FP_XZW,
|
||||
RC_REG_CLASS_FP_COUNT
|
||||
RC_REG_CLASS_FP_SINGLE,
|
||||
RC_REG_CLASS_FP_DOUBLE,
|
||||
RC_REG_CLASS_FP_TRIPLE,
|
||||
RC_REG_CLASS_FP_ALPHA,
|
||||
RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
|
||||
RC_REG_CLASS_FP_X,
|
||||
RC_REG_CLASS_FP_Y,
|
||||
RC_REG_CLASS_FP_Z,
|
||||
RC_REG_CLASS_FP_XY,
|
||||
RC_REG_CLASS_FP_YZ,
|
||||
RC_REG_CLASS_FP_XZ,
|
||||
RC_REG_CLASS_FP_XW,
|
||||
RC_REG_CLASS_FP_YW,
|
||||
RC_REG_CLASS_FP_ZW,
|
||||
RC_REG_CLASS_FP_XYW,
|
||||
RC_REG_CLASS_FP_YZW,
|
||||
RC_REG_CLASS_FP_XZW,
|
||||
RC_REG_CLASS_FP_COUNT
|
||||
};
|
||||
|
||||
enum rc_reg_class_vp {
|
||||
RC_REG_CLASS_VP_SINGLE,
|
||||
RC_REG_CLASS_VP_DOUBLE,
|
||||
RC_REG_CLASS_VP_TRIPLE,
|
||||
RC_REG_CLASS_VP_QUADRUPLE,
|
||||
RC_REG_CLASS_VP_COUNT
|
||||
RC_REG_CLASS_VP_SINGLE,
|
||||
RC_REG_CLASS_VP_DOUBLE,
|
||||
RC_REG_CLASS_VP_TRIPLE,
|
||||
RC_REG_CLASS_VP_QUADRUPLE,
|
||||
RC_REG_CLASS_VP_COUNT
|
||||
};
|
||||
|
||||
struct rc_regalloc_state {
|
||||
struct ra_regs *regs;
|
||||
struct ra_class *classes[RC_REG_CLASS_FP_COUNT];
|
||||
const struct rc_class *class_list;
|
||||
struct ra_regs *regs;
|
||||
struct ra_class *classes[RC_REG_CLASS_FP_COUNT];
|
||||
const struct rc_class *class_list;
|
||||
};
|
||||
|
||||
struct register_info {
|
||||
struct live_intervals Live[4];
|
||||
struct live_intervals Live[4];
|
||||
|
||||
unsigned int Used:1;
|
||||
unsigned int Allocated:1;
|
||||
unsigned int File:3;
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
unsigned int Writemask;
|
||||
unsigned int Used : 1;
|
||||
unsigned int Allocated : 1;
|
||||
unsigned int File : 3;
|
||||
unsigned int Index : RC_REGISTER_INDEX_BITS;
|
||||
unsigned int Writemask;
|
||||
};
|
||||
|
||||
struct regalloc_state {
|
||||
struct radeon_compiler * C;
|
||||
struct radeon_compiler *C;
|
||||
|
||||
struct register_info * Input;
|
||||
unsigned int NumInputs;
|
||||
struct register_info *Input;
|
||||
unsigned int NumInputs;
|
||||
|
||||
struct register_info * Temporary;
|
||||
unsigned int NumTemporaries;
|
||||
struct register_info *Temporary;
|
||||
unsigned int NumTemporaries;
|
||||
|
||||
unsigned int Simple;
|
||||
int LoopEnd;
|
||||
unsigned int Simple;
|
||||
int LoopEnd;
|
||||
};
|
||||
|
||||
struct rc_class {
|
||||
enum rc_reg_class ID;
|
||||
enum rc_reg_class ID;
|
||||
|
||||
unsigned int WritemaskCount;
|
||||
unsigned int WritemaskCount;
|
||||
|
||||
/** List of writemasks that belong to this class */
|
||||
unsigned int Writemasks[6];
|
||||
/** List of writemasks that belong to this class */
|
||||
unsigned int Writemasks[6];
|
||||
};
|
||||
|
||||
int rc_find_class(
|
||||
const struct rc_class * classes,
|
||||
unsigned int writemask,
|
||||
unsigned int max_writemask_count);
|
||||
int rc_find_class(const struct rc_class *classes, unsigned int writemask,
|
||||
unsigned int max_writemask_count);
|
||||
|
||||
unsigned int rc_overlap_live_intervals_array(
|
||||
struct live_intervals * a,
|
||||
struct live_intervals * b);
|
||||
unsigned int rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b);
|
||||
|
||||
static inline unsigned int reg_get_index(int reg)
|
||||
static inline unsigned int
|
||||
reg_get_index(int reg)
|
||||
{
|
||||
return reg / RC_MASK_XYZW;
|
||||
return reg / RC_MASK_XYZW;
|
||||
};
|
||||
|
||||
static inline unsigned int reg_get_writemask(int reg)
|
||||
static inline unsigned int
|
||||
reg_get_writemask(int reg)
|
||||
{
|
||||
return (reg % RC_MASK_XYZW) + 1;
|
||||
return (reg % RC_MASK_XYZW) + 1;
|
||||
};
|
||||
|
||||
static inline int get_reg_id(unsigned int index, unsigned int writemask)
|
||||
static inline int
|
||||
get_reg_id(unsigned int index, unsigned int writemask)
|
||||
{
|
||||
assert(writemask);
|
||||
if (writemask == 0) {
|
||||
return 0;
|
||||
}
|
||||
return (index * RC_MASK_XYZW) + (writemask - 1);
|
||||
assert(writemask);
|
||||
if (writemask == 0) {
|
||||
return 0;
|
||||
}
|
||||
return (index * RC_MASK_XYZW) + (writemask - 1);
|
||||
}
|
||||
|
||||
void rc_build_interference_graph(
|
||||
struct ra_graph * graph,
|
||||
struct rc_list * variables);
|
||||
void rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables);
|
||||
|
||||
void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog);
|
||||
void rc_destroy_regalloc_state(struct rc_regalloc_state *s);
|
||||
|
|
|
|||
|
|
@ -3,278 +3,279 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "radeon_remove_constants.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include "util/bitscan.h"
|
||||
#include "radeon_dataflow.h"
|
||||
|
||||
struct const_remap_state {
|
||||
/* Used when emiting shaders constants. */
|
||||
struct const_remap *remap_table;
|
||||
/* Used when rewritign registers */
|
||||
struct const_remap *inv_remap_table;
|
||||
/* Old costant layout. */
|
||||
struct rc_constant *constants;
|
||||
/* New constant layout. */
|
||||
struct rc_constant_list new_constants;
|
||||
/* Marks immediates that are used as a vector. Those will be just copied. */
|
||||
uint8_t *is_used_as_vector;
|
||||
bool has_rel_addr;
|
||||
bool are_externals_remapped;
|
||||
bool is_identity;
|
||||
/* Used when emiting shaders constants. */
|
||||
struct const_remap *remap_table;
|
||||
/* Used when rewritign registers */
|
||||
struct const_remap *inv_remap_table;
|
||||
/* Old costant layout. */
|
||||
struct rc_constant *constants;
|
||||
/* New constant layout. */
|
||||
struct rc_constant_list new_constants;
|
||||
/* Marks immediates that are used as a vector. Those will be just copied. */
|
||||
uint8_t *is_used_as_vector;
|
||||
bool has_rel_addr;
|
||||
bool are_externals_remapped;
|
||||
bool is_identity;
|
||||
};
|
||||
|
||||
static void remap_regs(struct rc_instruction *inst,
|
||||
struct const_remap *inv_remap_table)
|
||||
static void
|
||||
remap_regs(struct rc_instruction *inst, struct const_remap *inv_remap_table)
|
||||
{
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
|
||||
continue;
|
||||
unsigned old_index = inst->U.I.SrcReg[src].Index;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
|
||||
if (old_swz <= RC_SWIZZLE_W) {
|
||||
inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
|
||||
inv_remap_table[old_index].swizzle[old_swz]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
|
||||
continue;
|
||||
unsigned old_index = inst->U.I.SrcReg[src].Index;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
|
||||
if (old_swz <= RC_SWIZZLE_W) {
|
||||
inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
|
||||
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
|
||||
inv_remap_table[old_index].swizzle[old_swz]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mark_used(void * userdata, struct rc_instruction * inst,
|
||||
struct rc_src_register * src)
|
||||
static void
|
||||
mark_used(void *userdata, struct rc_instruction *inst, struct rc_src_register *src)
|
||||
{
|
||||
struct const_remap_state* d = userdata;
|
||||
struct const_remap_state *d = userdata;
|
||||
|
||||
if (src->File == RC_FILE_CONSTANT) {
|
||||
uint8_t mask = 0;
|
||||
if (src->RelAddr) {
|
||||
d->has_rel_addr = true;
|
||||
} else {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
char swz = GET_SWZ(src->Swizzle, chan);
|
||||
if (swz > RC_SWIZZLE_W)
|
||||
continue;
|
||||
mask |= 1 << swz;
|
||||
}
|
||||
}
|
||||
d->constants[src->Index].UseMask |= mask;
|
||||
if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE &&
|
||||
util_bitcount(mask) > 1) {
|
||||
d->is_used_as_vector[src->Index] |= mask;
|
||||
}
|
||||
}
|
||||
if (src->File == RC_FILE_CONSTANT) {
|
||||
uint8_t mask = 0;
|
||||
if (src->RelAddr) {
|
||||
d->has_rel_addr = true;
|
||||
} else {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
char swz = GET_SWZ(src->Swizzle, chan);
|
||||
if (swz > RC_SWIZZLE_W)
|
||||
continue;
|
||||
mask |= 1 << swz;
|
||||
}
|
||||
}
|
||||
d->constants[src->Index].UseMask |= mask;
|
||||
if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE && util_bitcount(mask) > 1) {
|
||||
d->is_used_as_vector[src->Index] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
|
||||
static void
|
||||
place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
unsigned count = s->new_constants.Count;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
s->inv_remap_table[i].index[chan] = count;
|
||||
s->inv_remap_table[i].swizzle[chan] = chan;
|
||||
if (s->constants[i].UseMask & (1 << chan)) {
|
||||
s->remap_table[count].index[chan] = i;
|
||||
s->remap_table[count].swizzle[chan] = chan;
|
||||
}
|
||||
}
|
||||
s->new_constants.Constants[count] = s->constants[i];
|
||||
unsigned count = s->new_constants.Count;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
s->inv_remap_table[i].index[chan] = count;
|
||||
s->inv_remap_table[i].swizzle[chan] = chan;
|
||||
if (s->constants[i].UseMask & (1 << chan)) {
|
||||
s->remap_table[count].index[chan] = i;
|
||||
s->remap_table[count].swizzle[chan] = chan;
|
||||
}
|
||||
}
|
||||
s->new_constants.Constants[count] = s->constants[i];
|
||||
|
||||
if (count != i) {
|
||||
if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
s->are_externals_remapped = true;
|
||||
s->is_identity = false;
|
||||
}
|
||||
s->new_constants.Count++;
|
||||
if (count != i) {
|
||||
if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
s->are_externals_remapped = true;
|
||||
s->is_identity = false;
|
||||
}
|
||||
s->new_constants.Count++;
|
||||
}
|
||||
|
||||
static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
|
||||
static void
|
||||
place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
assert(util_bitcount(s->is_used_as_vector[i]) > 1);
|
||||
assert(util_bitcount(s->is_used_as_vector[i]) > 1);
|
||||
|
||||
unsigned count = s->new_constants.Count;
|
||||
unsigned count = s->new_constants.Count;
|
||||
|
||||
s->new_constants.Constants[count] = s->constants[i];
|
||||
s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
|
||||
s->inv_remap_table[i].index[chan] = count;
|
||||
s->inv_remap_table[i].swizzle[chan] = chan;
|
||||
}
|
||||
}
|
||||
if (count != i) {
|
||||
s->is_identity = false;
|
||||
}
|
||||
s->new_constants.Count++;
|
||||
s->new_constants.Constants[count] = s->constants[i];
|
||||
s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
|
||||
s->inv_remap_table[i].index[chan] = count;
|
||||
s->inv_remap_table[i].swizzle[chan] = chan;
|
||||
}
|
||||
}
|
||||
if (count != i) {
|
||||
s->is_identity = false;
|
||||
}
|
||||
s->new_constants.Count++;
|
||||
}
|
||||
|
||||
static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
|
||||
static void
|
||||
try_merge_constants_external(struct const_remap_state *s, unsigned i)
|
||||
{
|
||||
assert(util_bitcount(s->constants[i].UseMask) == 1);
|
||||
for (unsigned j = 0; j < s->new_constants.Count; j++) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
|
||||
/* Writemask to swizzle */
|
||||
unsigned swizzle = 0;
|
||||
for (; swizzle < 4; swizzle++)
|
||||
if (s->constants[i].UseMask >> swizzle == 1)
|
||||
break;
|
||||
/* Update the remap tables. */
|
||||
s->remap_table[j].index[chan] = i;
|
||||
s->remap_table[j].swizzle[chan] = swizzle;
|
||||
s->inv_remap_table[i].index[swizzle] = j;
|
||||
s->inv_remap_table[i].swizzle[swizzle] = chan;
|
||||
s->are_externals_remapped = true;
|
||||
s->is_identity = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
place_constant_in_free_slot(s, i);
|
||||
assert(util_bitcount(s->constants[i].UseMask) == 1);
|
||||
for (unsigned j = 0; j < s->new_constants.Count; j++) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
|
||||
/* Writemask to swizzle */
|
||||
unsigned swizzle = 0;
|
||||
for (; swizzle < 4; swizzle++)
|
||||
if (s->constants[i].UseMask >> swizzle == 1)
|
||||
break;
|
||||
/* Update the remap tables. */
|
||||
s->remap_table[j].index[chan] = i;
|
||||
s->remap_table[j].swizzle[chan] = swizzle;
|
||||
s->inv_remap_table[i].index[swizzle] = j;
|
||||
s->inv_remap_table[i].swizzle[swizzle] = chan;
|
||||
s->are_externals_remapped = true;
|
||||
s->is_identity = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
|
||||
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
|
||||
static void
|
||||
init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
|
||||
{
|
||||
s->is_identity = true;
|
||||
s->is_used_as_vector = malloc(c->Program.Constants.Count);
|
||||
s->new_constants.Constants =
|
||||
malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
|
||||
s->new_constants._Reserved = c->Program.Constants.Count;
|
||||
s->constants = c->Program.Constants.Constants;
|
||||
memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
|
||||
s->is_identity = true;
|
||||
s->is_used_as_vector = malloc(c->Program.Constants.Count);
|
||||
s->new_constants.Constants = malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
|
||||
s->new_constants._Reserved = c->Program.Constants.Count;
|
||||
s->constants = c->Program.Constants.Constants;
|
||||
memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
|
||||
|
||||
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
s->inv_remap_table =
|
||||
malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
/* Clear the UseMask, we will update it later. */
|
||||
s->constants[i].UseMask = 0;
|
||||
for (unsigned swz = 0; swz < 4; swz++) {
|
||||
s->remap_table[i].index[swz] = -1;
|
||||
s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
|
||||
}
|
||||
}
|
||||
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
s->inv_remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
/* Clear the UseMask, we will update it later. */
|
||||
s->constants[i].UseMask = 0;
|
||||
for (unsigned swz = 0; swz < 4; swz++) {
|
||||
s->remap_table[i].index[swz] = -1;
|
||||
s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct const_remap **out_remap_table = (struct const_remap **)user;
|
||||
struct rc_constant *constants = c->Program.Constants.Constants;
|
||||
struct const_remap_state remap_state = {};
|
||||
struct const_remap_state *s = &remap_state;
|
||||
struct const_remap **out_remap_table = (struct const_remap **)user;
|
||||
struct rc_constant *constants = c->Program.Constants.Constants;
|
||||
struct const_remap_state remap_state = {};
|
||||
struct const_remap_state *s = &remap_state;
|
||||
|
||||
if (!c->Program.Constants.Count) {
|
||||
*out_remap_table = NULL;
|
||||
return;
|
||||
}
|
||||
if (!c->Program.Constants.Count) {
|
||||
*out_remap_table = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
init_constant_remap_state(c, s);
|
||||
init_constant_remap_state(c, s);
|
||||
|
||||
/* Pass 1: Mark used constants. */
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
rc_for_all_reads_src(inst, mark_used, s);
|
||||
}
|
||||
/* Pass 1: Mark used constants. */
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
rc_for_all_reads_src(inst, mark_used, s);
|
||||
}
|
||||
|
||||
/* Pass 2: If there is relative addressing or dead constant elimination
|
||||
* is disabled, mark all externals as used. */
|
||||
if (s->has_rel_addr || !c->remove_unused_constants) {
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
|
||||
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
s->constants[i].UseMask = RC_MASK_XYZW;
|
||||
}
|
||||
/* Pass 2: If there is relative addressing or dead constant elimination
|
||||
* is disabled, mark all externals as used. */
|
||||
if (s->has_rel_addr || !c->remove_unused_constants) {
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
|
||||
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
|
||||
s->constants[i].UseMask = RC_MASK_XYZW;
|
||||
}
|
||||
|
||||
/* Pass 3: Make the remapping table and remap constants.
|
||||
* First iterate over used vec2, vec3 and vec4 externals and place them in a free
|
||||
* slots. While we could in theory merge 2 vec2 together, its not worth it
|
||||
* as we would have to a) check that the swizzle is valid, b) transforming
|
||||
* xy to zw would mean we need rgb and alpha source slot, thus it would hurt
|
||||
* us potentially during pair scheduling. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 1) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Pass 3: Make the remapping table and remap constants.
|
||||
* First iterate over used vec2, vec3 and vec4 externals and place them in a free
|
||||
* slots. While we could in theory merge 2 vec2 together, its not worth it
|
||||
* as we would have to a) check that the swizzle is valid, b) transforming
|
||||
* xy to zw would mean we need rgb and alpha source slot, thus it would hurt
|
||||
* us potentially during pair scheduling. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 1) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
/* Now iterate over scalarar externals and put them into empty slots. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) == 1)
|
||||
try_merge_constants_external(s, i);
|
||||
}
|
||||
|
||||
/* Now iterate over scalarar externals and put them into empty slots. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_EXTERNAL)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) == 1)
|
||||
try_merge_constants_external(s, i);
|
||||
}
|
||||
/* Now put immediates which are used as vectors. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
|
||||
util_bitcount(s->constants[i].UseMask) > 0 &&
|
||||
util_bitcount(s->is_used_as_vector[i]) > 0) {
|
||||
place_immediate_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now put immediates which are used as vectors. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
|
||||
util_bitcount(s->constants[i].UseMask) > 0 &&
|
||||
util_bitcount(s->is_used_as_vector[i]) > 0) {
|
||||
place_immediate_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
/* Now walk over scalar immediates and try to:
|
||||
* a) check for duplicates,
|
||||
* b) find free slot.
|
||||
* All of this is already done by rc_constants_add_immediate_scalar,
|
||||
* so just use it.
|
||||
*/
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
|
||||
continue;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if ((s->constants[i].UseMask) & (1 << chan) &&
|
||||
(~(s->is_used_as_vector[i]) & (1 << chan))) {
|
||||
unsigned swz;
|
||||
s->inv_remap_table[i].index[chan] = rc_constants_add_immediate_scalar(
|
||||
&s->new_constants, constants[i].u.Immediate[chan], &swz);
|
||||
s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
|
||||
s->is_identity = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now walk over scalar immediates and try to:
|
||||
* a) check for duplicates,
|
||||
* b) find free slot.
|
||||
* All of this is already done by rc_constants_add_immediate_scalar,
|
||||
* so just use it.
|
||||
*/
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
|
||||
continue;
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if ((s->constants[i].UseMask) & (1 << chan) &&
|
||||
(~(s->is_used_as_vector[i]) & (1 << chan))) {
|
||||
unsigned swz;
|
||||
s->inv_remap_table[i].index[chan] =
|
||||
rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz);
|
||||
s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
|
||||
s->is_identity = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Finally place state constants. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_STATE)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 0) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally place state constants. */
|
||||
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
|
||||
if (constants[i].Type != RC_CONSTANT_STATE)
|
||||
continue;
|
||||
if (util_bitcount(s->constants[i].UseMask) > 0) {
|
||||
place_constant_in_free_slot(s, i);
|
||||
}
|
||||
}
|
||||
/* is_identity ==> new_count == old_count
|
||||
* !is_identity ==> new_count < old_count */
|
||||
assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
|
||||
|
||||
/* is_identity ==> new_count == old_count
|
||||
* !is_identity ==> new_count < old_count */
|
||||
assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
|
||||
/* Pass 4: Redirect reads of all constants to their new locations. */
|
||||
if (!s->is_identity) {
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
remap_regs(inst, s->inv_remap_table);
|
||||
}
|
||||
}
|
||||
|
||||
/* Pass 4: Redirect reads of all constants to their new locations. */
|
||||
if (!s->is_identity) {
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
remap_regs(inst, s->inv_remap_table);
|
||||
}
|
||||
}
|
||||
/* Set the new constant count. Note that new_count may be less than
|
||||
* Count even though the remapping function is identity. In that case,
|
||||
* the constants have been removed at the end of the array. */
|
||||
rc_constants_destroy(&c->Program.Constants);
|
||||
c->Program.Constants = s->new_constants;
|
||||
|
||||
/* Set the new constant count. Note that new_count may be less than
|
||||
* Count even though the remapping function is identity. In that case,
|
||||
* the constants have been removed at the end of the array. */
|
||||
rc_constants_destroy(&c->Program.Constants);
|
||||
c->Program.Constants = s->new_constants;
|
||||
if (s->are_externals_remapped) {
|
||||
*out_remap_table = s->remap_table;
|
||||
} else {
|
||||
*out_remap_table = NULL;
|
||||
free(s->remap_table);
|
||||
}
|
||||
|
||||
if (s->are_externals_remapped) {
|
||||
*out_remap_table = s->remap_table;
|
||||
} else {
|
||||
*out_remap_table = NULL;
|
||||
free(s->remap_table);
|
||||
}
|
||||
free(s->inv_remap_table);
|
||||
|
||||
free(s->inv_remap_table);
|
||||
|
||||
if (c->Debug & RC_DBG_LOG)
|
||||
rc_constants_print(&c->Program.Constants, s->remap_table);
|
||||
if (c->Debug & RC_DBG_LOG)
|
||||
rc_constants_print(&c->Program.Constants, s->remap_table);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,38 +20,37 @@
|
|||
* This function assumes all the instructions are still of type
|
||||
* RC_INSTRUCTION_NORMAL.
|
||||
*/
|
||||
void rc_rename_regs(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
rc_rename_regs(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct rc_list * variables;
|
||||
struct rc_list * var_ptr;
|
||||
struct rc_instruction *inst;
|
||||
struct rc_list *variables;
|
||||
struct rc_list *var_ptr;
|
||||
|
||||
/* XXX Remove this once the register allocation works with flow control. */
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
|
||||
return;
|
||||
}
|
||||
/* XXX Remove this once the register allocation works with flow control. */
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
|
||||
return;
|
||||
}
|
||||
|
||||
variables = rc_get_variables(c);
|
||||
variables = rc_get_variables(c);
|
||||
|
||||
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) {
|
||||
int new_index;
|
||||
unsigned writemask;
|
||||
struct rc_variable * var = var_ptr->Item;
|
||||
for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) {
|
||||
int new_index;
|
||||
unsigned writemask;
|
||||
struct rc_variable *var = var_ptr->Item;
|
||||
|
||||
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
|
||||
continue;
|
||||
}
|
||||
if (var->Inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
new_index = rc_find_free_temporary(c);
|
||||
if (new_index < 0) {
|
||||
rc_error(c, "Ran out of temporary registers\n");
|
||||
return;
|
||||
}
|
||||
new_index = rc_find_free_temporary(c);
|
||||
if (new_index < 0) {
|
||||
rc_error(c, "Ran out of temporary registers\n");
|
||||
return;
|
||||
}
|
||||
|
||||
writemask = rc_variable_writemask_sum(var);
|
||||
rc_variable_change_dst(var, new_index, writemask);
|
||||
}
|
||||
writemask = rc_variable_writemask_sum(var);
|
||||
rc_variable_change_dst(var, new_index, writemask);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,27 +9,27 @@
|
|||
#include "radeon_program.h"
|
||||
|
||||
struct rc_swizzle_split {
|
||||
unsigned char NumPhases;
|
||||
unsigned char Phase[4];
|
||||
unsigned char NumPhases;
|
||||
unsigned char Phase[4];
|
||||
};
|
||||
|
||||
/**
|
||||
* Describe the swizzling capability of target hardware.
|
||||
*/
|
||||
struct rc_swizzle_caps {
|
||||
/**
|
||||
* Check whether the given swizzle, absolute and negate combination
|
||||
* can be implemented natively by the hardware for this opcode.
|
||||
*
|
||||
* \return 1 if the swizzle is native for the given opcode
|
||||
*/
|
||||
int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
|
||||
/**
|
||||
* Check whether the given swizzle, absolute and negate combination
|
||||
* can be implemented natively by the hardware for this opcode.
|
||||
*
|
||||
* \return 1 if the swizzle is native for the given opcode
|
||||
*/
|
||||
int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
|
||||
|
||||
/**
|
||||
* Determine how to split access to the masked channels of the
|
||||
* given source register to obtain ALU-native swizzles.
|
||||
*/
|
||||
void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
|
||||
/**
|
||||
* Determine how to split access to the masked channels of the
|
||||
* given source register to obtain ALU-native swizzles.
|
||||
*/
|
||||
void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split *split);
|
||||
};
|
||||
|
||||
extern const struct rc_swizzle_caps r300_vertprog_swizzle_caps;
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "radeon_variable.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "radeon_variable.h"
|
||||
|
||||
#include "memory_pool.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
|
|
@ -19,314 +19,283 @@
|
|||
* and its friends to new_index and new_writemask. This function also takes
|
||||
* care of rewriting the swizzles for the sources of var.
|
||||
*/
|
||||
void rc_variable_change_dst(
|
||||
struct rc_variable * var,
|
||||
unsigned int new_index,
|
||||
unsigned int new_writemask)
|
||||
void
|
||||
rc_variable_change_dst(struct rc_variable *var, unsigned int new_index, unsigned int new_writemask)
|
||||
{
|
||||
struct rc_variable * var_ptr;
|
||||
struct rc_list * readers;
|
||||
unsigned int old_mask = rc_variable_writemask_sum(var);
|
||||
unsigned int conversion_swizzle =
|
||||
rc_make_conversion_swizzle(old_mask, new_writemask);
|
||||
struct rc_variable *var_ptr;
|
||||
struct rc_list *readers;
|
||||
unsigned int old_mask = rc_variable_writemask_sum(var);
|
||||
unsigned int conversion_swizzle = rc_make_conversion_swizzle(old_mask, new_writemask);
|
||||
|
||||
for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
|
||||
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
rc_normal_rewrite_writemask(var_ptr->Inst,
|
||||
conversion_swizzle);
|
||||
var_ptr->Inst->U.I.DstReg.Index = new_index;
|
||||
} else {
|
||||
struct rc_pair_sub_instruction * sub;
|
||||
if (var_ptr->Dst.WriteMask == RC_MASK_W) {
|
||||
assert(new_writemask & RC_MASK_W);
|
||||
sub = &var_ptr->Inst->U.P.Alpha;
|
||||
} else {
|
||||
sub = &var_ptr->Inst->U.P.RGB;
|
||||
rc_pair_rewrite_writemask(sub,
|
||||
conversion_swizzle);
|
||||
}
|
||||
sub->DestIndex = new_index;
|
||||
}
|
||||
}
|
||||
for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
|
||||
if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
rc_normal_rewrite_writemask(var_ptr->Inst, conversion_swizzle);
|
||||
var_ptr->Inst->U.I.DstReg.Index = new_index;
|
||||
} else {
|
||||
struct rc_pair_sub_instruction *sub;
|
||||
if (var_ptr->Dst.WriteMask == RC_MASK_W) {
|
||||
assert(new_writemask & RC_MASK_W);
|
||||
sub = &var_ptr->Inst->U.P.Alpha;
|
||||
} else {
|
||||
sub = &var_ptr->Inst->U.P.RGB;
|
||||
rc_pair_rewrite_writemask(sub, conversion_swizzle);
|
||||
}
|
||||
sub->DestIndex = new_index;
|
||||
}
|
||||
}
|
||||
|
||||
readers = rc_variable_readers_union(var);
|
||||
readers = rc_variable_readers_union(var);
|
||||
|
||||
for ( ; readers; readers = readers->Next) {
|
||||
struct rc_reader * reader = readers->Item;
|
||||
if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
reader->U.I.Src->Index = new_index;
|
||||
reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
|
||||
reader->U.I.Src->Swizzle, conversion_swizzle);
|
||||
} else {
|
||||
struct rc_pair_instruction * pair_inst =
|
||||
&reader->Inst->U.P;
|
||||
unsigned int src_type = rc_source_type_swz(
|
||||
reader->U.P.Arg->Swizzle);
|
||||
for (; readers; readers = readers->Next) {
|
||||
struct rc_reader *reader = readers->Item;
|
||||
if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
reader->U.I.Src->Index = new_index;
|
||||
reader->U.I.Src->Swizzle =
|
||||
rc_rewrite_swizzle(reader->U.I.Src->Swizzle, conversion_swizzle);
|
||||
} else {
|
||||
struct rc_pair_instruction *pair_inst = &reader->Inst->U.P;
|
||||
unsigned int src_type = rc_source_type_swz(reader->U.P.Arg->Swizzle);
|
||||
|
||||
int src_index = reader->U.P.Arg->Source;
|
||||
if (src_index == RC_PAIR_PRESUB_SRC) {
|
||||
src_index = rc_pair_get_src_index(
|
||||
pair_inst, reader->U.P.Src);
|
||||
}
|
||||
rc_pair_remove_src(reader->Inst, src_type,
|
||||
src_index);
|
||||
/* Reuse the source index of the source that
|
||||
* was just deleted and set its register
|
||||
* index. We can't use rc_pair_alloc_source
|
||||
* for this because it might return a source
|
||||
* index that is already being used. */
|
||||
if (src_type & RC_SOURCE_RGB) {
|
||||
pair_inst->RGB.Src[src_index]
|
||||
.Used = 1;
|
||||
pair_inst->RGB.Src[src_index]
|
||||
.Index = new_index;
|
||||
pair_inst->RGB.Src[src_index]
|
||||
.File = RC_FILE_TEMPORARY;
|
||||
}
|
||||
if (src_type & RC_SOURCE_ALPHA) {
|
||||
pair_inst->Alpha.Src[src_index]
|
||||
.Used = 1;
|
||||
pair_inst->Alpha.Src[src_index]
|
||||
.Index = new_index;
|
||||
pair_inst->Alpha.Src[src_index]
|
||||
.File = RC_FILE_TEMPORARY;
|
||||
}
|
||||
reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
|
||||
reader->U.P.Arg->Swizzle, conversion_swizzle);
|
||||
if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
|
||||
reader->U.P.Arg->Source = src_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
int src_index = reader->U.P.Arg->Source;
|
||||
if (src_index == RC_PAIR_PRESUB_SRC) {
|
||||
src_index = rc_pair_get_src_index(pair_inst, reader->U.P.Src);
|
||||
}
|
||||
rc_pair_remove_src(reader->Inst, src_type, src_index);
|
||||
/* Reuse the source index of the source that
|
||||
* was just deleted and set its register
|
||||
* index. We can't use rc_pair_alloc_source
|
||||
* for this because it might return a source
|
||||
* index that is already being used. */
|
||||
if (src_type & RC_SOURCE_RGB) {
|
||||
pair_inst->RGB.Src[src_index].Used = 1;
|
||||
pair_inst->RGB.Src[src_index].Index = new_index;
|
||||
pair_inst->RGB.Src[src_index].File = RC_FILE_TEMPORARY;
|
||||
}
|
||||
if (src_type & RC_SOURCE_ALPHA) {
|
||||
pair_inst->Alpha.Src[src_index].Used = 1;
|
||||
pair_inst->Alpha.Src[src_index].Index = new_index;
|
||||
pair_inst->Alpha.Src[src_index].File = RC_FILE_TEMPORARY;
|
||||
}
|
||||
reader->U.P.Arg->Swizzle =
|
||||
rc_rewrite_swizzle(reader->U.P.Arg->Swizzle, conversion_swizzle);
|
||||
if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
|
||||
reader->U.P.Arg->Source = src_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the live intervals for var and its friends.
|
||||
*/
|
||||
void rc_variable_compute_live_intervals(struct rc_variable * var)
|
||||
void
|
||||
rc_variable_compute_live_intervals(struct rc_variable *var)
|
||||
{
|
||||
while(var) {
|
||||
unsigned int i;
|
||||
unsigned int start = var->Inst->IP;
|
||||
while (var) {
|
||||
unsigned int i;
|
||||
unsigned int start = var->Inst->IP;
|
||||
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
unsigned int chan;
|
||||
unsigned int chan_start = start;
|
||||
unsigned int chan_end = var->Readers[i].Inst->IP;
|
||||
unsigned int mask = var->Readers[i].WriteMask;
|
||||
struct rc_instruction * inst;
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
unsigned int chan;
|
||||
unsigned int chan_start = start;
|
||||
unsigned int chan_end = var->Readers[i].Inst->IP;
|
||||
unsigned int mask = var->Readers[i].WriteMask;
|
||||
struct rc_instruction *inst;
|
||||
|
||||
/* Extend the live interval of T0 to the start of the
|
||||
* loop for sequences like:
|
||||
* BGNLOOP
|
||||
* read T0
|
||||
* ...
|
||||
* write T0
|
||||
* ENDLOOP
|
||||
*/
|
||||
if (var->Readers[i].Inst->IP < start) {
|
||||
struct rc_instruction * bgnloop =
|
||||
rc_match_endloop(var->Readers[i].Inst);
|
||||
chan_start = bgnloop->IP;
|
||||
}
|
||||
/* Extend the live interval of T0 to the start of the
|
||||
* loop for sequences like:
|
||||
* BGNLOOP
|
||||
* read T0
|
||||
* ...
|
||||
* write T0
|
||||
* ENDLOOP
|
||||
*/
|
||||
if (var->Readers[i].Inst->IP < start) {
|
||||
struct rc_instruction *bgnloop = rc_match_endloop(var->Readers[i].Inst);
|
||||
chan_start = bgnloop->IP;
|
||||
}
|
||||
|
||||
/* Extend the live interval of T0 to the start of the
|
||||
* loop in case there is a BRK instruction in the loop
|
||||
* (we don't actually check for a BRK instruction we
|
||||
* assume there is one somewhere in the loop, which
|
||||
* there usually is) for sequences like:
|
||||
* BGNLOOP
|
||||
* ...
|
||||
* conditional BRK
|
||||
* ...
|
||||
* write T0
|
||||
* ENDLOOP
|
||||
* read T0
|
||||
***************************************************
|
||||
* Extend the live interval of T0 to the end of the
|
||||
* loop for sequences like:
|
||||
* write T0
|
||||
* BGNLOOP
|
||||
* ...
|
||||
* read T0
|
||||
* ENDLOOP
|
||||
*/
|
||||
for (inst = var->Inst; inst != var->Readers[i].Inst;
|
||||
inst = inst->Next) {
|
||||
rc_opcode op = rc_get_flow_control_inst(inst);
|
||||
if (op == RC_OPCODE_ENDLOOP) {
|
||||
struct rc_instruction * bgnloop =
|
||||
rc_match_endloop(inst);
|
||||
if (bgnloop->IP < chan_start) {
|
||||
chan_start = bgnloop->IP;
|
||||
}
|
||||
} else if (op == RC_OPCODE_BGNLOOP) {
|
||||
struct rc_instruction * endloop =
|
||||
rc_match_bgnloop(inst);
|
||||
if (endloop->IP > chan_end) {
|
||||
chan_end = endloop->IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Extend the live interval of T0 to the start of the
|
||||
* loop in case there is a BRK instruction in the loop
|
||||
* (we don't actually check for a BRK instruction we
|
||||
* assume there is one somewhere in the loop, which
|
||||
* there usually is) for sequences like:
|
||||
* BGNLOOP
|
||||
* ...
|
||||
* conditional BRK
|
||||
* ...
|
||||
* write T0
|
||||
* ENDLOOP
|
||||
* read T0
|
||||
***************************************************
|
||||
* Extend the live interval of T0 to the end of the
|
||||
* loop for sequences like:
|
||||
* write T0
|
||||
* BGNLOOP
|
||||
* ...
|
||||
* read T0
|
||||
* ENDLOOP
|
||||
*/
|
||||
for (inst = var->Inst; inst != var->Readers[i].Inst; inst = inst->Next) {
|
||||
rc_opcode op = rc_get_flow_control_inst(inst);
|
||||
if (op == RC_OPCODE_ENDLOOP) {
|
||||
struct rc_instruction *bgnloop = rc_match_endloop(inst);
|
||||
if (bgnloop->IP < chan_start) {
|
||||
chan_start = bgnloop->IP;
|
||||
}
|
||||
} else if (op == RC_OPCODE_BGNLOOP) {
|
||||
struct rc_instruction *endloop = rc_match_bgnloop(inst);
|
||||
if (endloop->IP > chan_end) {
|
||||
chan_end = endloop->IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if ((mask >> chan) & 0x1) {
|
||||
if (!var->Live[chan].Used
|
||||
|| chan_start < var->Live[chan].Start) {
|
||||
var->Live[chan].Start =
|
||||
chan_start;
|
||||
}
|
||||
if (!var->Live[chan].Used
|
||||
|| chan_end > var->Live[chan].End) {
|
||||
var->Live[chan].End = chan_end;
|
||||
}
|
||||
var->Live[chan].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if ((mask >> chan) & 0x1) {
|
||||
if (!var->Live[chan].Used || chan_start < var->Live[chan].Start) {
|
||||
var->Live[chan].Start = chan_start;
|
||||
}
|
||||
if (!var->Live[chan].Used || chan_end > var->Live[chan].End) {
|
||||
var->Live[chan].End = chan_end;
|
||||
}
|
||||
var->Live[chan].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return 1 if a and b share a reader
|
||||
* @return 0 if they do not
|
||||
*/
|
||||
static unsigned int readers_intersect(
|
||||
struct rc_variable * a,
|
||||
struct rc_variable * b)
|
||||
static unsigned int
|
||||
readers_intersect(struct rc_variable *a, struct rc_variable *b)
|
||||
{
|
||||
unsigned int a_index, b_index;
|
||||
for (a_index = 0; a_index < a->ReaderCount; a_index++) {
|
||||
struct rc_reader reader_a = a->Readers[a_index];
|
||||
for (b_index = 0; b_index < b->ReaderCount; b_index++) {
|
||||
struct rc_reader reader_b = b->Readers[b_index];
|
||||
if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
|
||||
&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
|
||||
&& reader_a.U.I.Src == reader_b.U.I.Src) {
|
||||
unsigned int a_index, b_index;
|
||||
for (a_index = 0; a_index < a->ReaderCount; a_index++) {
|
||||
struct rc_reader reader_a = a->Readers[a_index];
|
||||
for (b_index = 0; b_index < b->ReaderCount; b_index++) {
|
||||
struct rc_reader reader_b = b->Readers[b_index];
|
||||
if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL &&
|
||||
reader_b.Inst->Type == RC_INSTRUCTION_NORMAL && reader_a.U.I.Src == reader_b.U.I.Src) {
|
||||
|
||||
return 1;
|
||||
}
|
||||
if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
|
||||
&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR
|
||||
&& reader_a.U.P.Src == reader_b.U.P.Src) {
|
||||
return 1;
|
||||
}
|
||||
if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR &&
|
||||
reader_b.Inst->Type == RC_INSTRUCTION_PAIR && reader_a.U.P.Src == reader_b.U.P.Src) {
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rc_variable_add_friend(
|
||||
struct rc_variable * var,
|
||||
struct rc_variable * friend)
|
||||
void
|
||||
rc_variable_add_friend(struct rc_variable *var, struct rc_variable *friend)
|
||||
{
|
||||
assert(var->Dst.Index == friend->Dst.Index);
|
||||
while(var->Friend) {
|
||||
var = var->Friend;
|
||||
}
|
||||
var->Friend = friend;
|
||||
assert(var->Dst.Index == friend->Dst.Index);
|
||||
while (var->Friend) {
|
||||
var = var->Friend;
|
||||
}
|
||||
var->Friend = friend;
|
||||
}
|
||||
|
||||
struct rc_variable * rc_variable(
|
||||
struct radeon_compiler * c,
|
||||
unsigned int DstFile,
|
||||
unsigned int DstIndex,
|
||||
unsigned int DstWriteMask,
|
||||
struct rc_reader_data * reader_data)
|
||||
struct rc_variable *
|
||||
rc_variable(struct radeon_compiler *c, unsigned int DstFile, unsigned int DstIndex,
|
||||
unsigned int DstWriteMask, struct rc_reader_data *reader_data)
|
||||
{
|
||||
struct rc_variable * new =
|
||||
memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
|
||||
memset(new, 0, sizeof(struct rc_variable));
|
||||
new->C = c;
|
||||
new->Dst.File = DstFile;
|
||||
new->Dst.Index = DstIndex;
|
||||
new->Dst.WriteMask = DstWriteMask;
|
||||
if (reader_data) {
|
||||
new->Inst = reader_data->Writer;
|
||||
new->ReaderCount = reader_data->ReaderCount;
|
||||
new->Readers = reader_data->Readers;
|
||||
}
|
||||
return new;
|
||||
struct rc_variable *new = memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
|
||||
memset(new, 0, sizeof(struct rc_variable));
|
||||
new->C = c;
|
||||
new->Dst.File = DstFile;
|
||||
new->Dst.Index = DstIndex;
|
||||
new->Dst.WriteMask = DstWriteMask;
|
||||
if (reader_data) {
|
||||
new->Inst = reader_data->Writer;
|
||||
new->ReaderCount = reader_data->ReaderCount;
|
||||
new->Readers = reader_data->Readers;
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
static void get_variable_helper(
|
||||
struct rc_list ** variable_list,
|
||||
struct rc_variable * variable)
|
||||
static void
|
||||
get_variable_helper(struct rc_list **variable_list, struct rc_variable *variable)
|
||||
{
|
||||
struct rc_list * list_ptr;
|
||||
for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
struct rc_variable * var;
|
||||
for (var = list_ptr->Item; var; var = var->Friend) {
|
||||
if (readers_intersect(var, variable)) {
|
||||
rc_variable_add_friend(var, variable);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
|
||||
struct rc_list *list_ptr;
|
||||
for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
struct rc_variable *var;
|
||||
for (var = list_ptr->Item; var; var = var->Friend) {
|
||||
if (readers_intersect(var, variable)) {
|
||||
rc_variable_add_friend(var, variable);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
|
||||
}
|
||||
|
||||
static void get_variable_pair_helper(
|
||||
struct rc_list ** variable_list,
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst,
|
||||
struct rc_pair_sub_instruction * sub_inst)
|
||||
static void
|
||||
get_variable_pair_helper(struct rc_list **variable_list, struct radeon_compiler *c,
|
||||
struct rc_instruction *inst, struct rc_pair_sub_instruction *sub_inst)
|
||||
{
|
||||
struct rc_reader_data reader_data;
|
||||
struct rc_variable * new_var;
|
||||
rc_register_file file;
|
||||
unsigned int writemask;
|
||||
struct rc_reader_data reader_data;
|
||||
struct rc_variable *new_var;
|
||||
rc_register_file file;
|
||||
unsigned int writemask;
|
||||
|
||||
if (sub_inst->Opcode == RC_OPCODE_NOP) {
|
||||
return;
|
||||
}
|
||||
memset(&reader_data, 0, sizeof(struct rc_reader_data));
|
||||
rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
|
||||
if (sub_inst->Opcode == RC_OPCODE_NOP) {
|
||||
return;
|
||||
}
|
||||
memset(&reader_data, 0, sizeof(struct rc_reader_data));
|
||||
rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
|
||||
|
||||
if (reader_data.ReaderCount == 0) {
|
||||
return;
|
||||
}
|
||||
if (reader_data.ReaderCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (sub_inst->WriteMask) {
|
||||
file = RC_FILE_TEMPORARY;
|
||||
writemask = sub_inst->WriteMask;
|
||||
} else if (sub_inst->OutputWriteMask) {
|
||||
file = RC_FILE_OUTPUT;
|
||||
writemask = sub_inst->OutputWriteMask;
|
||||
} else {
|
||||
writemask = 0;
|
||||
file = RC_FILE_NONE;
|
||||
}
|
||||
new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
|
||||
&reader_data);
|
||||
get_variable_helper(variable_list, new_var);
|
||||
if (sub_inst->WriteMask) {
|
||||
file = RC_FILE_TEMPORARY;
|
||||
writemask = sub_inst->WriteMask;
|
||||
} else if (sub_inst->OutputWriteMask) {
|
||||
file = RC_FILE_OUTPUT;
|
||||
writemask = sub_inst->OutputWriteMask;
|
||||
} else {
|
||||
writemask = 0;
|
||||
file = RC_FILE_NONE;
|
||||
}
|
||||
new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, &reader_data);
|
||||
get_variable_helper(variable_list, new_var);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare function for sorting variable pointers by the lowest instruction
|
||||
* IP from it and its friends.
|
||||
*/
|
||||
static int cmpfunc_variable_by_ip (const void * a, const void * b) {
|
||||
struct rc_variable * var_a = *(struct rc_variable **)a;
|
||||
struct rc_variable * var_b = *(struct rc_variable **)b;
|
||||
unsigned int min_ip_a = var_a->Inst->IP;
|
||||
unsigned int min_ip_b = var_b->Inst->IP;
|
||||
static int
|
||||
cmpfunc_variable_by_ip(const void *a, const void *b)
|
||||
{
|
||||
struct rc_variable *var_a = *(struct rc_variable **)a;
|
||||
struct rc_variable *var_b = *(struct rc_variable **)b;
|
||||
unsigned int min_ip_a = var_a->Inst->IP;
|
||||
unsigned int min_ip_b = var_b->Inst->IP;
|
||||
|
||||
/* Find the minimal IP of a variable and its friends */
|
||||
while (var_a->Friend) {
|
||||
var_a = var_a->Friend;
|
||||
if (var_a->Inst->IP < min_ip_a)
|
||||
min_ip_a = var_a->Inst->IP;
|
||||
}
|
||||
while (var_b->Friend) {
|
||||
var_b = var_b->Friend;
|
||||
if (var_b->Inst->IP < min_ip_b)
|
||||
min_ip_b = var_b->Inst->IP;
|
||||
}
|
||||
/* Find the minimal IP of a variable and its friends */
|
||||
while (var_a->Friend) {
|
||||
var_a = var_a->Friend;
|
||||
if (var_a->Inst->IP < min_ip_a)
|
||||
min_ip_a = var_a->Inst->IP;
|
||||
}
|
||||
while (var_b->Friend) {
|
||||
var_b = var_b->Friend;
|
||||
if (var_b->Inst->IP < min_ip_b)
|
||||
min_ip_b = var_b->Inst->IP;
|
||||
}
|
||||
|
||||
return (int)min_ip_a - (int)min_ip_b;
|
||||
return (int)min_ip_a - (int)min_ip_b;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -336,117 +305,110 @@ static int cmpfunc_variable_by_ip (const void * a, const void * b) {
|
|||
* definition-use chain. Any two variables that share a reader are considered
|
||||
* "friends" and they are linked together via the Friend attribute.
|
||||
*/
|
||||
struct rc_list * rc_get_variables(struct radeon_compiler * c)
|
||||
struct rc_list *
|
||||
rc_get_variables(struct radeon_compiler *c)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct rc_list * variable_list = NULL;
|
||||
struct rc_instruction *inst;
|
||||
struct rc_list *variable_list = NULL;
|
||||
|
||||
/* We search for the variables in two loops in order to get it right in
|
||||
* the following specific case
|
||||
*
|
||||
* IF aluresult.x___;
|
||||
* ...
|
||||
* MAD temp[0].xyz, src0.000, src0.111, src0.000
|
||||
* MAD temp[0].w, src0.0, src0.1, src0.0
|
||||
* ELSE;
|
||||
* ...
|
||||
* TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE;
|
||||
* ENDIF;
|
||||
* src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT
|
||||
* MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000
|
||||
* MAD temp[1].w, src0.w, src1.w, src0.0
|
||||
*
|
||||
* If we go just in one loop, we will first create two variables for the
|
||||
* temp[0].xyz and temp[0].w. This happens because they don't share a reader
|
||||
* as the src1.xyz and src1.w of the instruction where the value is used are
|
||||
* in theory independent. They are not because the same register is written
|
||||
* also by the texture instruction in the other branch and TEX can't write xyz
|
||||
* and w separately.
|
||||
*
|
||||
* Therefore first search for RC_INSTRUCTION_NORMAL to create variables from
|
||||
* the texture instruction and than the pair instructions will be properly
|
||||
* marked as friends. So we will end with only one variable here as we should.
|
||||
*
|
||||
* This doesn't matter before the pair translation, because everything is
|
||||
* RC_INSTRUCTION_NORMAL.
|
||||
*/
|
||||
for (inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
struct rc_reader_data reader_data;
|
||||
struct rc_variable * new_var;
|
||||
memset(&reader_data, 0, sizeof(reader_data));
|
||||
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
|
||||
if (reader_data.ReaderCount == 0) {
|
||||
/* Variable is only returned if there is both writer
|
||||
* and reader. This means dead writes will not get
|
||||
* register allocated as a result and can overwrite random
|
||||
* registers. Assert on dead writes instead so we can improve
|
||||
* the DCE.
|
||||
*/
|
||||
const struct rc_opcode_info *opcode =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
assert(c->type == RC_FRAGMENT_PROGRAM ||
|
||||
!opcode->HasDstReg ||
|
||||
inst->U.I.DstReg.File == RC_FILE_OUTPUT ||
|
||||
inst->U.I.DstReg.File == RC_FILE_ADDRESS);
|
||||
continue;
|
||||
}
|
||||
new_var = rc_variable(c, inst->U.I.DstReg.File,
|
||||
inst->U.I.DstReg.Index,
|
||||
inst->U.I.DstReg.WriteMask, &reader_data);
|
||||
get_variable_helper(&variable_list, new_var);
|
||||
}
|
||||
}
|
||||
/* We search for the variables in two loops in order to get it right in
|
||||
* the following specific case
|
||||
*
|
||||
* IF aluresult.x___;
|
||||
* ...
|
||||
* MAD temp[0].xyz, src0.000, src0.111, src0.000
|
||||
* MAD temp[0].w, src0.0, src0.1, src0.0
|
||||
* ELSE;
|
||||
* ...
|
||||
* TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE;
|
||||
* ENDIF;
|
||||
* src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT
|
||||
* MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000
|
||||
* MAD temp[1].w, src0.w, src1.w, src0.0
|
||||
*
|
||||
* If we go just in one loop, we will first create two variables for the
|
||||
* temp[0].xyz and temp[0].w. This happens because they don't share a reader
|
||||
* as the src1.xyz and src1.w of the instruction where the value is used are
|
||||
* in theory independent. They are not because the same register is written
|
||||
* also by the texture instruction in the other branch and TEX can't write xyz
|
||||
* and w separately.
|
||||
*
|
||||
* Therefore first search for RC_INSTRUCTION_NORMAL to create variables from
|
||||
* the texture instruction and than the pair instructions will be properly
|
||||
* marked as friends. So we will end with only one variable here as we should.
|
||||
*
|
||||
* This doesn't matter before the pair translation, because everything is
|
||||
* RC_INSTRUCTION_NORMAL.
|
||||
*/
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
struct rc_reader_data reader_data;
|
||||
struct rc_variable *new_var;
|
||||
memset(&reader_data, 0, sizeof(reader_data));
|
||||
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
|
||||
if (reader_data.ReaderCount == 0) {
|
||||
/* Variable is only returned if there is both writer
|
||||
* and reader. This means dead writes will not get
|
||||
* register allocated as a result and can overwrite random
|
||||
* registers. Assert on dead writes instead so we can improve
|
||||
* the DCE.
|
||||
*/
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
assert(c->type == RC_FRAGMENT_PROGRAM || !opcode->HasDstReg ||
|
||||
inst->U.I.DstReg.File == RC_FILE_OUTPUT ||
|
||||
inst->U.I.DstReg.File == RC_FILE_ADDRESS);
|
||||
continue;
|
||||
}
|
||||
new_var = rc_variable(c, inst->U.I.DstReg.File, inst->U.I.DstReg.Index,
|
||||
inst->U.I.DstReg.WriteMask, &reader_data);
|
||||
get_variable_helper(&variable_list, new_var);
|
||||
}
|
||||
}
|
||||
|
||||
bool needs_sorting = false;
|
||||
for (inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
if (inst->Type != RC_INSTRUCTION_NORMAL) {
|
||||
needs_sorting = true;
|
||||
get_variable_pair_helper(&variable_list, c, inst,
|
||||
&inst->U.P.RGB);
|
||||
get_variable_pair_helper(&variable_list, c, inst,
|
||||
&inst->U.P.Alpha);
|
||||
}
|
||||
}
|
||||
bool needs_sorting = false;
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
if (inst->Type != RC_INSTRUCTION_NORMAL) {
|
||||
needs_sorting = true;
|
||||
get_variable_pair_helper(&variable_list, c, inst, &inst->U.P.RGB);
|
||||
get_variable_pair_helper(&variable_list, c, inst, &inst->U.P.Alpha);
|
||||
}
|
||||
}
|
||||
|
||||
if (variable_list && needs_sorting) {
|
||||
unsigned int count = rc_list_count(variable_list);
|
||||
struct rc_variable **variables = memory_pool_malloc(&c->Pool,
|
||||
sizeof(struct rc_variable *) * count);
|
||||
if (variable_list && needs_sorting) {
|
||||
unsigned int count = rc_list_count(variable_list);
|
||||
struct rc_variable **variables =
|
||||
memory_pool_malloc(&c->Pool, sizeof(struct rc_variable *) * count);
|
||||
|
||||
struct rc_list * current = variable_list;
|
||||
for(unsigned int i = 0; current; i++, current = current->Next) {
|
||||
struct rc_variable * var = current->Item;
|
||||
variables[i] = var;
|
||||
}
|
||||
struct rc_list *current = variable_list;
|
||||
for (unsigned int i = 0; current; i++, current = current->Next) {
|
||||
struct rc_variable *var = current->Item;
|
||||
variables[i] = var;
|
||||
}
|
||||
|
||||
qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip);
|
||||
qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip);
|
||||
|
||||
current = variable_list;
|
||||
for(unsigned int i = 0; current; i++, current = current->Next) {
|
||||
current->Item = variables[i];
|
||||
}
|
||||
}
|
||||
current = variable_list;
|
||||
for (unsigned int i = 0; current; i++, current = current->Next) {
|
||||
current->Item = variables[i];
|
||||
}
|
||||
}
|
||||
|
||||
return variable_list;
|
||||
return variable_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The bitwise or of the writemasks of a variable and all of its
|
||||
* friends.
|
||||
*/
|
||||
unsigned int rc_variable_writemask_sum(struct rc_variable * var)
|
||||
unsigned int
|
||||
rc_variable_writemask_sum(struct rc_variable *var)
|
||||
{
|
||||
unsigned int writemask = 0;
|
||||
while(var) {
|
||||
writemask |= var->Dst.WriteMask;
|
||||
var = var->Friend;
|
||||
}
|
||||
return writemask;
|
||||
unsigned int writemask = 0;
|
||||
while (var) {
|
||||
writemask |= var->Dst.WriteMask;
|
||||
var = var->Friend;
|
||||
}
|
||||
return writemask;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -454,135 +416,121 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var)
|
|||
* that read from two different variable friends are only included once in
|
||||
* this list.
|
||||
*/
|
||||
struct rc_list * rc_variable_readers_union(struct rc_variable * var)
|
||||
struct rc_list *
|
||||
rc_variable_readers_union(struct rc_variable *var)
|
||||
{
|
||||
struct rc_list * list = NULL;
|
||||
while (var) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
struct rc_list * temp;
|
||||
struct rc_reader * a = &var->Readers[i];
|
||||
unsigned int match = 0;
|
||||
for (temp = list; temp; temp = temp->Next) {
|
||||
struct rc_reader * b = temp->Item;
|
||||
if (a->Inst->Type != b->Inst->Type) {
|
||||
continue;
|
||||
}
|
||||
if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
if (a->U.I.Src == b->U.I.Src) {
|
||||
match = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
if (a->U.P.Arg == b->U.P.Arg
|
||||
&& a->U.P.Src == b->U.P.Src) {
|
||||
match = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
continue;
|
||||
}
|
||||
rc_list_add(&list, rc_list(&var->C->Pool, a));
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
return list;
|
||||
struct rc_list *list = NULL;
|
||||
while (var) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
struct rc_list *temp;
|
||||
struct rc_reader *a = &var->Readers[i];
|
||||
unsigned int match = 0;
|
||||
for (temp = list; temp; temp = temp->Next) {
|
||||
struct rc_reader *b = temp->Item;
|
||||
if (a->Inst->Type != b->Inst->Type) {
|
||||
continue;
|
||||
}
|
||||
if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
if (a->U.I.Src == b->U.I.Src) {
|
||||
match = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
|
||||
if (a->U.P.Arg == b->U.P.Arg && a->U.P.Src == b->U.P.Src) {
|
||||
match = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
continue;
|
||||
}
|
||||
rc_list_add(&list, rc_list(&var->C->Pool, a));
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
static unsigned int reader_equals_src(
|
||||
struct rc_reader reader,
|
||||
unsigned int src_type,
|
||||
void * src)
|
||||
static unsigned int
|
||||
reader_equals_src(struct rc_reader reader, unsigned int src_type, void *src)
|
||||
{
|
||||
if (reader.Inst->Type != src_type) {
|
||||
return 0;
|
||||
}
|
||||
if (src_type == RC_INSTRUCTION_NORMAL) {
|
||||
return reader.U.I.Src == src;
|
||||
} else {
|
||||
return reader.U.P.Src == src;
|
||||
}
|
||||
if (reader.Inst->Type != src_type) {
|
||||
return 0;
|
||||
}
|
||||
if (src_type == RC_INSTRUCTION_NORMAL) {
|
||||
return reader.U.I.Src == src;
|
||||
} else {
|
||||
return reader.U.P.Src == src;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int variable_writes_src(
|
||||
struct rc_variable * var,
|
||||
unsigned int src_type,
|
||||
void * src)
|
||||
static unsigned int
|
||||
variable_writes_src(struct rc_variable *var, unsigned int src_type, void *src)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
if (reader_equals_src(var->Readers[i], src_type, src)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
unsigned int i;
|
||||
for (i = 0; i < var->ReaderCount; i++) {
|
||||
if (reader_equals_src(var->Readers[i], src_type, src)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct rc_list * rc_variable_list_get_writers(
|
||||
struct rc_list * var_list,
|
||||
unsigned int src_type,
|
||||
void * src)
|
||||
struct rc_list *
|
||||
rc_variable_list_get_writers(struct rc_list *var_list, unsigned int src_type, void *src)
|
||||
{
|
||||
struct rc_list * list_ptr;
|
||||
struct rc_list * writer_list = NULL;
|
||||
for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
struct rc_variable * var = list_ptr->Item;
|
||||
if (variable_writes_src(var, src_type, src)) {
|
||||
struct rc_variable * friend;
|
||||
rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
|
||||
for (friend = var->Friend; friend;
|
||||
friend = friend->Friend) {
|
||||
if (variable_writes_src(friend, src_type, src)) {
|
||||
rc_list_add(&writer_list,
|
||||
rc_list(&var->C->Pool, friend));
|
||||
}
|
||||
}
|
||||
/* Once we have identified the variable and its
|
||||
* friends that write this source, we can stop
|
||||
* stop searching, because we know none of the
|
||||
* other variables in the list will write this source.
|
||||
* If they did they would be friends of var.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
return writer_list;
|
||||
struct rc_list *list_ptr;
|
||||
struct rc_list *writer_list = NULL;
|
||||
for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
|
||||
struct rc_variable *var = list_ptr->Item;
|
||||
if (variable_writes_src(var, src_type, src)) {
|
||||
struct rc_variable *friend;
|
||||
rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
|
||||
for (friend = var->Friend; friend; friend = friend->Friend) {
|
||||
if (variable_writes_src(friend, src_type, src)) {
|
||||
rc_list_add(&writer_list, rc_list(&var->C->Pool, friend));
|
||||
}
|
||||
}
|
||||
/* Once we have identified the variable and its
|
||||
* friends that write this source, we can stop
|
||||
* stop searching, because we know none of the
|
||||
* other variables in the list will write this source.
|
||||
* If they did they would be friends of var.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
return writer_list;
|
||||
}
|
||||
|
||||
struct rc_list * rc_variable_list_get_writers_one_reader(
|
||||
struct rc_list * var_list,
|
||||
unsigned int src_type,
|
||||
void * src)
|
||||
struct rc_list *
|
||||
rc_variable_list_get_writers_one_reader(struct rc_list *var_list, unsigned int src_type, void *src)
|
||||
{
|
||||
struct rc_list * writer_list =
|
||||
rc_variable_list_get_writers(var_list, src_type, src);
|
||||
struct rc_list * reader_list =
|
||||
rc_variable_readers_union(writer_list->Item);
|
||||
if (rc_list_count(reader_list) > 1) {
|
||||
return NULL;
|
||||
} else {
|
||||
return writer_list;
|
||||
}
|
||||
struct rc_list *writer_list = rc_variable_list_get_writers(var_list, src_type, src);
|
||||
struct rc_list *reader_list = rc_variable_readers_union(writer_list->Item);
|
||||
if (rc_list_count(reader_list) > 1) {
|
||||
return NULL;
|
||||
} else {
|
||||
return writer_list;
|
||||
}
|
||||
}
|
||||
|
||||
void rc_variable_print(struct rc_variable * var)
|
||||
void
|
||||
rc_variable_print(struct rc_variable *var)
|
||||
{
|
||||
unsigned int i;
|
||||
while (var) {
|
||||
fprintf(stderr, "%u: TEMP[%u].%u: ",
|
||||
var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
|
||||
for (i = 0; i < 4; i++) {
|
||||
fprintf(stderr, "chan %u: start=%u end=%u ", i,
|
||||
var->Live[i].Start, var->Live[i].End);
|
||||
}
|
||||
fprintf(stderr, "%u readers\n", var->ReaderCount);
|
||||
if (var->Friend) {
|
||||
fprintf(stderr, "Friend: \n\t");
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
unsigned int i;
|
||||
while (var) {
|
||||
fprintf(stderr, "%u: TEMP[%u].%u: ", var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
|
||||
for (i = 0; i < 4; i++) {
|
||||
fprintf(stderr, "chan %u: start=%u end=%u ", i, var->Live[i].Start, var->Live[i].End);
|
||||
}
|
||||
fprintf(stderr, "%u readers\n", var->ReaderCount);
|
||||
if (var->Friend) {
|
||||
fprintf(stderr, "Friend: \n\t");
|
||||
}
|
||||
var = var->Friend;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,59 +14,48 @@ struct rc_reader_data;
|
|||
struct rc_readers;
|
||||
|
||||
struct live_intervals {
|
||||
int Start;
|
||||
int End;
|
||||
int Used;
|
||||
int Start;
|
||||
int End;
|
||||
int Used;
|
||||
};
|
||||
|
||||
struct rc_variable {
|
||||
struct radeon_compiler * C;
|
||||
struct rc_dst_register Dst;
|
||||
struct radeon_compiler *C;
|
||||
struct rc_dst_register Dst;
|
||||
|
||||
struct rc_instruction * Inst;
|
||||
unsigned int ReaderCount;
|
||||
struct rc_reader * Readers;
|
||||
struct live_intervals Live[4];
|
||||
struct rc_instruction *Inst;
|
||||
unsigned int ReaderCount;
|
||||
struct rc_reader *Readers;
|
||||
struct live_intervals Live[4];
|
||||
|
||||
/* A friend is a variable that shares a reader with another variable.
|
||||
*/
|
||||
struct rc_variable * Friend;
|
||||
/* A friend is a variable that shares a reader with another variable.
|
||||
*/
|
||||
struct rc_variable *Friend;
|
||||
};
|
||||
|
||||
void rc_variable_change_dst(
|
||||
struct rc_variable * var,
|
||||
unsigned int new_index,
|
||||
unsigned int new_writemask);
|
||||
void rc_variable_change_dst(struct rc_variable *var, unsigned int new_index,
|
||||
unsigned int new_writemask);
|
||||
|
||||
void rc_variable_compute_live_intervals(struct rc_variable * var);
|
||||
void rc_variable_compute_live_intervals(struct rc_variable *var);
|
||||
|
||||
void rc_variable_add_friend(
|
||||
struct rc_variable * var,
|
||||
struct rc_variable * friend);
|
||||
void rc_variable_add_friend(struct rc_variable *var, struct rc_variable *friend);
|
||||
|
||||
struct rc_variable * rc_variable(
|
||||
struct radeon_compiler * c,
|
||||
unsigned int DstFile,
|
||||
unsigned int DstIndex,
|
||||
unsigned int DstWriteMask,
|
||||
struct rc_reader_data * reader_data);
|
||||
struct rc_variable *rc_variable(struct radeon_compiler *c, unsigned int DstFile,
|
||||
unsigned int DstIndex, unsigned int DstWriteMask,
|
||||
struct rc_reader_data *reader_data);
|
||||
|
||||
struct rc_list * rc_get_variables(struct radeon_compiler * c);
|
||||
struct rc_list *rc_get_variables(struct radeon_compiler *c);
|
||||
|
||||
unsigned int rc_variable_writemask_sum(struct rc_variable * var);
|
||||
unsigned int rc_variable_writemask_sum(struct rc_variable *var);
|
||||
|
||||
struct rc_list * rc_variable_readers_union(struct rc_variable * var);
|
||||
struct rc_list *rc_variable_readers_union(struct rc_variable *var);
|
||||
|
||||
struct rc_list * rc_variable_list_get_writers(
|
||||
struct rc_list * var_list,
|
||||
unsigned int src_type,
|
||||
void * src);
|
||||
struct rc_list *rc_variable_list_get_writers(struct rc_list *var_list, unsigned int src_type,
|
||||
void *src);
|
||||
|
||||
struct rc_list * rc_variable_list_get_writers_one_reader(
|
||||
struct rc_list * var_list,
|
||||
unsigned int src_type,
|
||||
void * src);
|
||||
struct rc_list *rc_variable_list_get_writers_one_reader(struct rc_list *var_list,
|
||||
unsigned int src_type, void *src);
|
||||
|
||||
void rc_variable_print(struct rc_variable * var);
|
||||
void rc_variable_print(struct rc_variable *var);
|
||||
|
||||
#endif /* RADEON_VARIABLE_H */
|
||||
|
|
|
|||
|
|
@ -11,259 +11,246 @@
|
|||
#include "radeon_program_constants.h"
|
||||
|
||||
struct vert_fc_state {
|
||||
struct radeon_compiler *C;
|
||||
unsigned BranchDepth;
|
||||
unsigned LoopDepth;
|
||||
unsigned LoopsReserved;
|
||||
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
|
||||
int PredicateReg;
|
||||
struct radeon_compiler *C;
|
||||
unsigned BranchDepth;
|
||||
unsigned LoopDepth;
|
||||
unsigned LoopsReserved;
|
||||
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
|
||||
int PredicateReg;
|
||||
};
|
||||
|
||||
static void build_pred_src(
|
||||
struct rc_src_register * src,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
build_pred_src(struct rc_src_register *src, struct vert_fc_state *fc_state)
|
||||
{
|
||||
src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
|
||||
src->File = RC_FILE_TEMPORARY;
|
||||
src->Index = fc_state->PredicateReg;
|
||||
src->Swizzle =
|
||||
RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
|
||||
src->File = RC_FILE_TEMPORARY;
|
||||
src->Index = fc_state->PredicateReg;
|
||||
}
|
||||
|
||||
static void build_pred_dst(
|
||||
struct rc_dst_register * dst,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
build_pred_dst(struct rc_dst_register *dst, struct vert_fc_state *fc_state)
|
||||
{
|
||||
dst->WriteMask = RC_MASK_W;
|
||||
dst->File = RC_FILE_TEMPORARY;
|
||||
dst->Index = fc_state->PredicateReg;
|
||||
dst->WriteMask = RC_MASK_W;
|
||||
dst->File = RC_FILE_TEMPORARY;
|
||||
dst->Index = fc_state->PredicateReg;
|
||||
}
|
||||
|
||||
static void mark_write(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
static void
|
||||
mark_write(void *userdata, struct rc_instruction *inst, rc_register_file file, unsigned int index,
|
||||
unsigned int mask)
|
||||
{
|
||||
unsigned int * writemasks = userdata;
|
||||
unsigned int *writemasks = userdata;
|
||||
|
||||
if (file != RC_FILE_TEMPORARY)
|
||||
return;
|
||||
if (file != RC_FILE_TEMPORARY)
|
||||
return;
|
||||
|
||||
if (index >= R300_VS_MAX_TEMPS)
|
||||
return;
|
||||
if (index >= R300_VS_MAX_TEMPS)
|
||||
return;
|
||||
|
||||
writemasks[index] |= mask;
|
||||
writemasks[index] |= mask;
|
||||
}
|
||||
|
||||
static int reserve_predicate_reg(struct vert_fc_state * fc_state)
|
||||
static int
|
||||
reserve_predicate_reg(struct vert_fc_state *fc_state)
|
||||
{
|
||||
int i;
|
||||
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
|
||||
struct rc_instruction * inst;
|
||||
memset(writemasks, 0, sizeof(writemasks));
|
||||
for(inst = fc_state->C->Program.Instructions.Next;
|
||||
inst != &fc_state->C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_for_all_writes_mask(inst, mark_write, writemasks);
|
||||
}
|
||||
int i;
|
||||
unsigned int writemasks[RC_REGISTER_MAX_INDEX];
|
||||
struct rc_instruction *inst;
|
||||
memset(writemasks, 0, sizeof(writemasks));
|
||||
for (inst = fc_state->C->Program.Instructions.Next; inst != &fc_state->C->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
rc_for_all_writes_mask(inst, mark_write, writemasks);
|
||||
}
|
||||
|
||||
for(i = 0; i < fc_state->C->max_temp_regs; i++) {
|
||||
/* Most of the control flow instructions only write the
|
||||
* W component of the Predicate Register, but
|
||||
* the docs say that ME_PRED_SET_CLR and
|
||||
* ME_PRED_SET_RESTORE write all components of the
|
||||
* register, so we must reserve a register that has
|
||||
* all its components free. */
|
||||
if (!writemasks[i]) {
|
||||
fc_state->PredicateReg = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == fc_state->C->max_temp_regs) {
|
||||
rc_error(fc_state->C, "No free temporary to use for"
|
||||
" predicate stack counter.\n");
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
for (i = 0; i < fc_state->C->max_temp_regs; i++) {
|
||||
/* Most of the control flow instructions only write the
|
||||
* W component of the Predicate Register, but
|
||||
* the docs say that ME_PRED_SET_CLR and
|
||||
* ME_PRED_SET_RESTORE write all components of the
|
||||
* register, so we must reserve a register that has
|
||||
* all its components free. */
|
||||
if (!writemasks[i]) {
|
||||
fc_state->PredicateReg = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == fc_state->C->max_temp_regs) {
|
||||
rc_error(fc_state->C, "No free temporary to use for"
|
||||
" predicate stack counter.\n");
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void lower_bgnloop(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
lower_bgnloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
|
||||
{
|
||||
struct rc_instruction * new_inst =
|
||||
rc_insert_new_instruction(fc_state->C, inst->Prev);
|
||||
struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst->Prev);
|
||||
|
||||
if ((!fc_state->C->is_r500
|
||||
&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
|
||||
|| fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
|
||||
rc_error(fc_state->C, "Loops are nested too deep.");
|
||||
return;
|
||||
}
|
||||
if ((!fc_state->C->is_r500 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH) ||
|
||||
fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
|
||||
rc_error(fc_state->C, "Loops are nested too deep.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the predicate bit to true. */
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[0].Index = 0;
|
||||
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
fc_state->PredStack[fc_state->LoopDepth] =
|
||||
fc_state->PredicateReg;
|
||||
/* Copy the current predicate value to this loop's
|
||||
* predicate register */
|
||||
/* Initialize the predicate bit to true. */
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[0].Index = 0;
|
||||
new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
fc_state->PredStack[fc_state->LoopDepth] = fc_state->PredicateReg;
|
||||
/* Copy the current predicate value to this loop's
|
||||
* predicate register */
|
||||
|
||||
/* Use the old predicate value for src0 */
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
/* Use the old predicate value for src0 */
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
|
||||
/* Reserve this loop's predicate register */
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Copy the old predicate value to the new register */
|
||||
new_inst->U.I.Opcode = RC_OPCODE_ADD;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[1].Index = 0;
|
||||
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
|
||||
}
|
||||
/* Reserve this loop's predicate register */
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Copy the old predicate value to the new register */
|
||||
new_inst->U.I.Opcode = RC_OPCODE_ADD;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
new_inst->U.I.SrcReg[1].Index = 0;
|
||||
new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
|
||||
new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
|
||||
}
|
||||
}
|
||||
|
||||
static void lower_brk(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
lower_brk(struct rc_instruction *inst, struct vert_fc_state *fc_state)
|
||||
{
|
||||
if (fc_state->LoopDepth == 1) {
|
||||
inst->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
inst->U.I.SrcReg[0].Index = 0;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
if (fc_state->LoopDepth == 1) {
|
||||
inst->U.I.Opcode = RC_OPCODE_RCP;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
inst->U.I.SrcReg[0].Index = 0;
|
||||
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||
} else {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
}
|
||||
|
||||
static void lower_endloop(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
lower_endloop(struct rc_instruction *inst, struct vert_fc_state *fc_state)
|
||||
{
|
||||
struct rc_instruction * new_inst =
|
||||
rc_insert_new_instruction(fc_state->C, inst);
|
||||
struct rc_instruction *new_inst = rc_insert_new_instruction(fc_state->C, inst);
|
||||
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
/* Restore the previous predicate register. */
|
||||
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
|
||||
build_pred_dst(&new_inst->U.I.DstReg, fc_state);
|
||||
/* Restore the previous predicate register. */
|
||||
fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
|
||||
build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
|
||||
}
|
||||
|
||||
static void lower_if(
|
||||
struct rc_instruction * inst,
|
||||
struct vert_fc_state * fc_state)
|
||||
static void
|
||||
lower_if(struct rc_instruction *inst, struct vert_fc_state *fc_state)
|
||||
{
|
||||
/* Reserve a temporary to use as our predicate stack counter, if we
|
||||
* don't already have one. */
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
/* If we are inside a loop, the Predicate Register should
|
||||
* have already been defined. */
|
||||
assert(fc_state->LoopDepth == 0);
|
||||
/* Reserve a temporary to use as our predicate stack counter, if we
|
||||
* don't already have one. */
|
||||
if (fc_state->PredicateReg == -1) {
|
||||
/* If we are inside a loop, the Predicate Register should
|
||||
* have already been defined. */
|
||||
assert(fc_state->LoopDepth == 0);
|
||||
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (reserve_predicate_reg(fc_state) == -1) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
|
||||
} else {
|
||||
unsigned swz;
|
||||
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
|
||||
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
|
||||
sizeof(inst->U.I.SrcReg[1]));
|
||||
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
|
||||
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
|
||||
* w component */
|
||||
inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
|
||||
RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], fc_state);
|
||||
}
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
|
||||
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
|
||||
} else {
|
||||
unsigned swz;
|
||||
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
|
||||
memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0], sizeof(inst->U.I.SrcReg[1]));
|
||||
swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
|
||||
/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
|
||||
* w component */
|
||||
inst->U.I.SrcReg[1].Swizzle =
|
||||
RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], fc_state);
|
||||
}
|
||||
build_pred_dst(&inst->U.I.DstReg, fc_state);
|
||||
}
|
||||
|
||||
void rc_vert_fc(struct radeon_compiler *c, void *user)
|
||||
void
|
||||
rc_vert_fc(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct vert_fc_state fc_state;
|
||||
struct rc_instruction *inst;
|
||||
struct vert_fc_state fc_state;
|
||||
|
||||
memset(&fc_state, 0, sizeof(fc_state));
|
||||
fc_state.PredicateReg = -1;
|
||||
fc_state.C = c;
|
||||
memset(&fc_state, 0, sizeof(fc_state));
|
||||
fc_state.PredicateReg = -1;
|
||||
fc_state.C = c;
|
||||
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
|
||||
switch (inst->U.I.Opcode) {
|
||||
switch (inst->U.I.Opcode) {
|
||||
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
lower_bgnloop(inst, &fc_state);
|
||||
fc_state.LoopDepth++;
|
||||
break;
|
||||
case RC_OPCODE_BGNLOOP:
|
||||
lower_bgnloop(inst, &fc_state);
|
||||
fc_state.LoopDepth++;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_BRK:
|
||||
lower_brk(inst, &fc_state);
|
||||
break;
|
||||
case RC_OPCODE_BRK:
|
||||
lower_brk(inst, &fc_state);
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
if (fc_state.BranchDepth != 0
|
||||
|| fc_state.LoopDepth != 1) {
|
||||
lower_endloop(inst, &fc_state);
|
||||
/* Skip the new PRED_RESTORE */
|
||||
inst = inst->Next;
|
||||
}
|
||||
fc_state.LoopDepth--;
|
||||
break;
|
||||
case RC_OPCODE_IF:
|
||||
lower_if(inst, &fc_state);
|
||||
fc_state.BranchDepth++;
|
||||
break;
|
||||
case RC_OPCODE_ENDLOOP:
|
||||
if (fc_state.BranchDepth != 0 || fc_state.LoopDepth != 1) {
|
||||
lower_endloop(inst, &fc_state);
|
||||
/* Skip the new PRED_RESTORE */
|
||||
inst = inst->Next;
|
||||
}
|
||||
fc_state.LoopDepth--;
|
||||
break;
|
||||
case RC_OPCODE_IF:
|
||||
lower_if(inst, &fc_state);
|
||||
fc_state.BranchDepth++;
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ELSE:
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_INV;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
break;
|
||||
case RC_OPCODE_ELSE:
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_INV;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
break;
|
||||
|
||||
case RC_OPCODE_ENDIF:
|
||||
/* TODO: If LoopDepth == 1 and there is only a single break
|
||||
* we can optimize out the endif just after the break. However
|
||||
* previous attempts were buggy, so keep it simple for now.
|
||||
*/
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
fc_state.BranchDepth--;
|
||||
break;
|
||||
case RC_OPCODE_ENDIF:
|
||||
/* TODO: If LoopDepth == 1 and there is only a single break
|
||||
* we can optimize out the endif just after the break. However
|
||||
* previous attempts were buggy, so keep it simple for now.
|
||||
*/
|
||||
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
|
||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||
fc_state.BranchDepth--;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (fc_state.BranchDepth || fc_state.LoopDepth) {
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (fc_state.BranchDepth || fc_state.LoopDepth) {
|
||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (c->Error) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (c->Error) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue