mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
r300/compiler: Implement the texture semaphore
The texture semaphore allows for prefetching of texture data. On my RV515, this increases the FPS of Lightsmark by 33% (This is with the reg_rename pass enabled, which is enabled in the next commit). There is a new env variable now called RADEON_TEX_GROUP, which allows you to specify the maximum number of texture lookups to do at once. The default is 8, but different values could produce better results for various application / card combinations.
This commit is contained in:
parent
51fe9994bd
commit
163629fd05
6 changed files with 308 additions and 49 deletions
|
|
@ -262,7 +262,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
|
|||
} else {
|
||||
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
|
||||
}
|
||||
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
|
||||
code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
|
||||
|
||||
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
|
||||
code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
|
||||
|
|
@ -380,9 +380,9 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
|
|||
|
||||
code->inst[ip].inst0 = R500_INST_TYPE_TEX
|
||||
| (inst->DstReg.WriteMask << 11)
|
||||
| R500_INST_TEX_SEM_WAIT;
|
||||
| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
|
||||
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
|
||||
| R500_TEX_SEM_ACQUIRE;
|
||||
| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
|
||||
|
||||
if (inst->TexSrcTarget == RC_TEXTURE_RECT)
|
||||
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
|
||||
|
|
@ -650,6 +650,9 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
|
|||
}
|
||||
}
|
||||
|
||||
/* Make sure TEX_SEM_WAIT is set on the last instruction */
|
||||
code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
|
||||
|
||||
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
|
||||
rc_error(&compiler->Base, "Too many hardware temporaries used");
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,10 @@
|
|||
#include "radeon_compiler.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_list.h"
|
||||
#include "radeon_variable.h"
|
||||
|
||||
#include "util/u_debug.h"
|
||||
|
||||
#define VERBOSE 0
|
||||
|
||||
|
|
@ -65,6 +68,17 @@ struct schedule_instruction {
|
|||
* PairedInst references the alpha insturction's dependency information.
|
||||
*/
|
||||
struct schedule_instruction * PairedInst;
|
||||
|
||||
/** This scheduler uses the value of Score to determine which
|
||||
* instruction to schedule. Instructions with a higher value of Score
|
||||
* will be scheduled first. */
|
||||
int Score;
|
||||
|
||||
/** The number of components that read from a TEX instruction. */
|
||||
unsigned TexReadCount;
|
||||
|
||||
/** For TEX instructions a list of readers */
|
||||
struct rc_list * TexReaders;
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -131,6 +145,12 @@ struct schedule_state {
|
|||
struct schedule_instruction *ReadyAlpha;
|
||||
struct schedule_instruction *ReadyTEX;
|
||||
/*@}*/
|
||||
struct rc_list *PendingTEX;
|
||||
|
||||
void (*CalcScore)(struct schedule_instruction *);
|
||||
long max_tex_group;
|
||||
unsigned PrevBlockHasTex:1;
|
||||
unsigned TEXCount;
|
||||
};
|
||||
|
||||
static struct reg_value ** get_reg_valuep(struct schedule_state * s,
|
||||
|
|
@ -147,6 +167,29 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s,
|
|||
return &s->Temporary[index].Values[chan];
|
||||
}
|
||||
|
||||
static unsigned get_tex_read_count(struct schedule_instruction * sinst)
|
||||
{
|
||||
unsigned tex_read_count = sinst->TexReadCount;
|
||||
if (sinst->PairedInst) {
|
||||
tex_read_count += sinst->PairedInst->TexReadCount;
|
||||
}
|
||||
return tex_read_count;
|
||||
}
|
||||
|
||||
#if VERBOSE
|
||||
static void print_list(struct schedule_instruction * sinst)
|
||||
{
|
||||
struct schedule_instruction * ptr;
|
||||
for (ptr = sinst; ptr; ptr=ptr->NextReady) {
|
||||
unsigned tex_read_count = get_tex_read_count(ptr);
|
||||
unsigned score = sinst->Score;
|
||||
fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
|
||||
tex_read_count);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
static void remove_inst_from_list(struct schedule_instruction ** list,
|
||||
struct schedule_instruction * inst)
|
||||
{
|
||||
|
|
@ -172,17 +215,28 @@ static void add_inst_to_list(struct schedule_instruction ** list, struct schedul
|
|||
*list = inst;
|
||||
}
|
||||
|
||||
static void add_inst_to_list_end(struct schedule_instruction ** list,
|
||||
static void add_inst_to_list_score(struct schedule_instruction ** list,
|
||||
struct schedule_instruction * inst)
|
||||
{
|
||||
if(!*list){
|
||||
struct schedule_instruction * temp;
|
||||
struct schedule_instruction * prev;
|
||||
if (!*list) {
|
||||
*list = inst;
|
||||
}else{
|
||||
struct schedule_instruction * temp = *list;
|
||||
while(temp->NextReady){
|
||||
temp = temp->NextReady;
|
||||
}
|
||||
temp->NextReady = inst;
|
||||
return;
|
||||
}
|
||||
temp = *list;
|
||||
prev = NULL;
|
||||
while(temp && inst->Score <= temp->Score) {
|
||||
prev = temp;
|
||||
temp = temp->NextReady;
|
||||
}
|
||||
|
||||
if (!prev) {
|
||||
inst->NextReady = temp;
|
||||
*list = inst;
|
||||
} else {
|
||||
prev->NextReady = inst;
|
||||
inst->NextReady = temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -193,13 +247,13 @@ static void instruction_ready(struct schedule_state * s, struct schedule_instruc
|
|||
/* Adding Ready TEX instructions to the end of the "Ready List" helps
|
||||
* us emit TEX instructions in blocks without losing our place. */
|
||||
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
|
||||
add_inst_to_list_end(&s->ReadyTEX, sinst);
|
||||
add_inst_to_list_score(&s->ReadyTEX, sinst);
|
||||
else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
|
||||
add_inst_to_list(&s->ReadyRGB, sinst);
|
||||
add_inst_to_list_score(&s->ReadyRGB, sinst);
|
||||
else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
|
||||
add_inst_to_list(&s->ReadyAlpha, sinst);
|
||||
add_inst_to_list_score(&s->ReadyAlpha, sinst);
|
||||
else
|
||||
add_inst_to_list(&s->ReadyFullALU, sinst);
|
||||
add_inst_to_list_score(&s->ReadyFullALU, sinst);
|
||||
}
|
||||
|
||||
static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
|
||||
|
|
@ -210,6 +264,68 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins
|
|||
instruction_ready(s, sinst);
|
||||
}
|
||||
|
||||
/* These functions provide different heuristics for scheduling instructions.
|
||||
* The default is calc_score_readers. */
|
||||
|
||||
#if 0
|
||||
|
||||
static void calc_score_zero(struct schedule_instruction * sinst)
|
||||
{
|
||||
sinst->Score = 0;
|
||||
}
|
||||
|
||||
static void calc_score_deps(struct schedule_instruction * sinst)
|
||||
{
|
||||
int i;
|
||||
sinst->Score = 0;
|
||||
for (i = 0; i < sinst->NumWriteValues; i++) {
|
||||
struct reg_value * v = sinst->WriteValues[i];
|
||||
if (v->NumReaders) {
|
||||
struct reg_value_reader * r;
|
||||
for (r = v->Readers; r; r = r->Next) {
|
||||
if (r->Reader->NumDependencies == 1) {
|
||||
sinst->Score += 100;
|
||||
}
|
||||
sinst->Score += r->Reader->NumDependencies;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define NO_READ_TEX_SCORE (1 << 16)
|
||||
#define NO_OUTPUT_SCORE (1 << 24)
|
||||
|
||||
static void calc_score_readers(struct schedule_instruction * sinst)
|
||||
{
|
||||
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
|
||||
sinst->Score = 0;
|
||||
} else {
|
||||
sinst->Score = sinst->NumReadValues;
|
||||
if (sinst->PairedInst) {
|
||||
sinst->Score += sinst->PairedInst->NumReadValues;
|
||||
}
|
||||
if (get_tex_read_count(sinst) == 0) {
|
||||
sinst->Score |= NO_READ_TEX_SCORE;
|
||||
}
|
||||
if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
|
||||
!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
|
||||
if (sinst->PairedInst) {
|
||||
if (!sinst->PairedInst->Instruction->U.P.
|
||||
RGB.OutputWriteMask
|
||||
&& !sinst->PairedInst->Instruction->U.P.
|
||||
Alpha.OutputWriteMask) {
|
||||
sinst->Score |= NO_OUTPUT_SCORE;
|
||||
}
|
||||
|
||||
} else {
|
||||
sinst->Score |= NO_OUTPUT_SCORE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function decreases the dependencies of the next instruction that
|
||||
* wants to write to each of sinst's read values.
|
||||
|
|
@ -222,8 +338,9 @@ static void commit_update_reads(struct schedule_state * s,
|
|||
assert(v->NumReaders > 0);
|
||||
v->NumReaders--;
|
||||
if (!v->NumReaders) {
|
||||
if (v->Next)
|
||||
if (v->Next) {
|
||||
decrease_dependencies(s, v->Next->Writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sinst->PairedInst) {
|
||||
|
|
@ -256,13 +373,33 @@ static void commit_update_writes(struct schedule_state * s,
|
|||
}
|
||||
}
|
||||
|
||||
static void notify_sem_wait(struct schedule_state *s)
|
||||
{
|
||||
struct rc_list * pend_ptr;
|
||||
for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
|
||||
struct rc_list * read_ptr;
|
||||
struct schedule_instruction * pending = pend_ptr->Item;
|
||||
for (read_ptr = pending->TexReaders; read_ptr;
|
||||
read_ptr = read_ptr->Next) {
|
||||
struct schedule_instruction * reader = read_ptr->Item;
|
||||
reader->TexReadCount--;
|
||||
}
|
||||
}
|
||||
s->PendingTEX = NULL;
|
||||
}
|
||||
|
||||
static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
|
||||
{
|
||||
DBG("%i: commit\n", sinst->Instruction->IP);
|
||||
DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
|
||||
|
||||
commit_update_reads(s, sinst);
|
||||
|
||||
commit_update_writes(s, sinst);
|
||||
|
||||
if (get_tex_read_count(sinst) > 0) {
|
||||
sinst->Instruction->U.P.SemWait = 1;
|
||||
notify_sem_wait(s);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -277,6 +414,7 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
|
|||
struct rc_instruction * inst_begin;
|
||||
|
||||
assert(s->ReadyTEX);
|
||||
notify_sem_wait(s);
|
||||
|
||||
/* Node marker for R300 */
|
||||
inst_begin = rc_insert_new_instruction(s->C, before->Prev);
|
||||
|
|
@ -308,6 +446,12 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
|
|||
while(readytex){
|
||||
DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
|
||||
commit_update_writes(s, readytex);
|
||||
/* Set semaphore bits for last TEX instruction in the block */
|
||||
if (!readytex->NextReady) {
|
||||
readytex->Instruction->U.I.TexSemAcquire = 1;
|
||||
readytex->Instruction->U.I.TexSemWait = 1;
|
||||
}
|
||||
rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
|
||||
readytex = readytex->NextReady;
|
||||
}
|
||||
}
|
||||
|
|
@ -491,6 +635,9 @@ static int destructive_merge_instructions(
|
|||
rgb->ALUResultCompare = alpha->ALUResultCompare;
|
||||
}
|
||||
|
||||
/* Copy SemWait */
|
||||
rgb->SemWait |= alpha->SemWait;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -824,14 +971,14 @@ static void pair_instructions(struct schedule_state * s)
|
|||
&& convert_rgb_to_alpha(s, rgb_ptr)) {
|
||||
|
||||
struct schedule_instruction * pair_ptr;
|
||||
remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
|
||||
add_inst_to_list_score(&s->ReadyAlpha, rgb_ptr);
|
||||
|
||||
for (pair_ptr = s->ReadyRGB; pair_ptr;
|
||||
pair_ptr = pair_ptr->NextReady) {
|
||||
if (pair_ptr == rgb_ptr) {
|
||||
continue;
|
||||
}
|
||||
if (merge_instructions(&pair_ptr->Instruction->U.P,
|
||||
&rgb_ptr->Instruction->U.P)) {
|
||||
remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
|
||||
remove_inst_from_list(&s->ReadyAlpha, rgb_ptr);
|
||||
remove_inst_from_list(&s->ReadyRGB, pair_ptr);
|
||||
pair_ptr->PairedInst = rgb_ptr;
|
||||
|
||||
|
|
@ -849,6 +996,68 @@ static void pair_instructions(struct schedule_state * s)
|
|||
}
|
||||
}
|
||||
|
||||
static void update_max_score(
|
||||
struct schedule_state * s,
|
||||
struct schedule_instruction ** list,
|
||||
int * max_score,
|
||||
struct schedule_instruction ** max_inst_out,
|
||||
struct schedule_instruction *** list_out)
|
||||
{
|
||||
struct schedule_instruction * list_ptr;
|
||||
for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
|
||||
int score;
|
||||
s->CalcScore(list_ptr);
|
||||
score = list_ptr->Score;
|
||||
if (!*max_inst_out || score > *max_score) {
|
||||
*max_score = score;
|
||||
*max_inst_out = list_ptr;
|
||||
*list_out = list;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_instruction(
|
||||
struct schedule_state * s,
|
||||
struct rc_instruction * before)
|
||||
{
|
||||
int max_score = -1;
|
||||
struct schedule_instruction * max_inst = NULL;
|
||||
struct schedule_instruction ** max_list = NULL;
|
||||
unsigned tex_count = 0;
|
||||
struct schedule_instruction * tex_ptr;
|
||||
|
||||
pair_instructions(s);
|
||||
#if VERBOSE
|
||||
fprintf(stderr, "Full:\n");
|
||||
print_list(s->ReadyFullALU);
|
||||
fprintf(stderr, "RGB:\n");
|
||||
print_list(s->ReadyRGB);
|
||||
fprintf(stderr, "Alpha:\n");
|
||||
print_list(s->ReadyAlpha);
|
||||
fprintf(stderr, "TEX:\n");
|
||||
print_list(s->ReadyTEX);
|
||||
#endif
|
||||
|
||||
for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
|
||||
tex_count++;
|
||||
}
|
||||
update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
|
||||
update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
|
||||
update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
|
||||
|
||||
if (tex_count >= s->max_tex_group || max_score == -1
|
||||
|| (s->TEXCount > 0 && tex_count == s->TEXCount)) {
|
||||
emit_all_tex(s, before);
|
||||
} else {
|
||||
|
||||
|
||||
remove_inst_from_list(max_list, max_inst);
|
||||
rc_insert_instruction(before->Prev, max_inst->Instruction);
|
||||
commit_alu_instruction(s, max_inst);
|
||||
|
||||
presub_nop(before->Prev);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a good ALU instruction or pair of ALU instruction and emit it.
|
||||
|
|
@ -860,6 +1069,7 @@ static void pair_instructions(struct schedule_state * s)
|
|||
static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
|
||||
{
|
||||
struct schedule_instruction * sinst;
|
||||
int rgb_score = -1, alpha_score = -1;
|
||||
|
||||
/* Try to merge RGB and Alpha instructions together. */
|
||||
pair_instructions(s);
|
||||
|
|
@ -871,6 +1081,12 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
|
|||
commit_alu_instruction(s, sinst);
|
||||
} else {
|
||||
if (s->ReadyRGB) {
|
||||
rgb_score = s->ReadyRGB->Score;
|
||||
}
|
||||
if (s->ReadyAlpha) {
|
||||
alpha_score = s->ReadyAlpha->Score;
|
||||
}
|
||||
if (rgb_score > alpha_score) {
|
||||
sinst = s->ReadyRGB;
|
||||
s->ReadyRGB = s->ReadyRGB->NextReady;
|
||||
} else if (s->ReadyAlpha) {
|
||||
|
|
@ -924,6 +1140,12 @@ static void scan_read(void * data, struct rc_instruction * inst,
|
|||
/* Only update the current instruction's dependencies if the
|
||||
* register it reads from has been written to in this block. */
|
||||
if ((*v)->Writer) {
|
||||
if ((*v)->Writer->Instruction->Type ==
|
||||
RC_INSTRUCTION_NORMAL) {
|
||||
s->Current->TexReadCount++;
|
||||
rc_list_add(&((*v)->Writer->TexReaders),
|
||||
rc_list(&s->C->Pool, s->Current));
|
||||
}
|
||||
s->Current->NumDependencies++;
|
||||
}
|
||||
}
|
||||
|
|
@ -977,22 +1199,33 @@ static void is_rgb_to_alpha_possible_normal(
|
|||
|
||||
}
|
||||
|
||||
static void schedule_block(struct r300_fragment_program_compiler * c,
|
||||
static void schedule_block(struct schedule_state * s,
|
||||
struct rc_instruction * begin, struct rc_instruction * end)
|
||||
{
|
||||
struct schedule_state s;
|
||||
unsigned int ip;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = &c->Base;
|
||||
|
||||
/* Scan instructions for data dependencies */
|
||||
ip = 0;
|
||||
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
|
||||
s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
|
||||
memset(s.Current, 0, sizeof(struct schedule_instruction));
|
||||
s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
|
||||
memset(s->Current, 0, sizeof(struct schedule_instruction));
|
||||
|
||||
s.Current->Instruction = inst;
|
||||
if (inst->Type == RC_INSTRUCTION_NORMAL) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
if (info->HasTexture) {
|
||||
s->TEXCount++;
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: This causes SemWait to be set for all instructions in
|
||||
* a block if the previous block contained a TEX instruction.
|
||||
* We can do better here, but it will take a lot of work. */
|
||||
if (s->PrevBlockHasTex) {
|
||||
s->Current->TexReadCount = 1;
|
||||
}
|
||||
|
||||
s->Current->Instruction = inst;
|
||||
inst->IP = ip++;
|
||||
|
||||
DBG("%i: Scanning\n", inst->IP);
|
||||
|
|
@ -1001,17 +1234,18 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
|
|||
* counter-intuitive, to account for the case where an
|
||||
* instruction writes to the same register as it reads
|
||||
* from. */
|
||||
rc_for_all_writes_chan(inst, &scan_write, &s);
|
||||
rc_for_all_reads_chan(inst, &scan_read, &s);
|
||||
rc_for_all_writes_chan(inst, &scan_write, s);
|
||||
rc_for_all_reads_chan(inst, &scan_read, s);
|
||||
|
||||
DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
|
||||
DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
|
||||
|
||||
if (!s.Current->NumDependencies)
|
||||
instruction_ready(&s, s.Current);
|
||||
if (!s->Current->NumDependencies) {
|
||||
instruction_ready(s, s->Current);
|
||||
}
|
||||
|
||||
/* Get global readers for possible RGB->Alpha conversion. */
|
||||
s.Current->GlobalReaders.ExitOnAbort = 1;
|
||||
rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
|
||||
s->Current->GlobalReaders.ExitOnAbort = 1;
|
||||
rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
|
||||
is_rgb_to_alpha_possible_normal,
|
||||
is_rgb_to_alpha_possible, NULL);
|
||||
}
|
||||
|
|
@ -1021,13 +1255,17 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
|
|||
end->Prev = begin->Prev;
|
||||
|
||||
/* Schedule instructions back */
|
||||
while(!s.C->Error &&
|
||||
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
|
||||
if (s.ReadyTEX)
|
||||
emit_all_tex(&s, end);
|
||||
while(!s->C->Error &&
|
||||
(s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
|
||||
if (s->C->is_r500) {
|
||||
emit_instruction(s, end);
|
||||
} else {
|
||||
if (s->ReadyTEX)
|
||||
emit_all_tex(s, end);
|
||||
|
||||
while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
|
||||
emit_one_alu(&s, end);
|
||||
while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha))
|
||||
emit_one_alu(s, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1042,13 +1280,14 @@ static int is_controlflow(struct rc_instruction * inst)
|
|||
|
||||
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
|
||||
{
|
||||
struct schedule_state s;
|
||||
|
||||
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
|
||||
struct schedule_state s;
|
||||
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
s.C = &c->Base;
|
||||
s.CalcScore = calc_score_readers;
|
||||
s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
|
||||
while(inst != &c->Base.Program.Instructions) {
|
||||
struct rc_instruction * first;
|
||||
|
||||
|
|
@ -1063,6 +1302,11 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user)
|
|||
inst = inst->Next;
|
||||
|
||||
DBG("Schedule one block\n");
|
||||
schedule_block(c, first, inst);
|
||||
memset(s.Temporary, 0, sizeof(s.Temporary));
|
||||
s.TEXCount = 0;
|
||||
schedule_block(&s, first, inst);
|
||||
if (s.PendingTEX) {
|
||||
s.PrevBlockHasTex = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,6 +109,10 @@ struct rc_sub_instruction {
|
|||
/** True if tex instruction should do shadow comparison */
|
||||
unsigned int TexShadow:1;
|
||||
|
||||
/**/
|
||||
unsigned int TexSemWait:1;
|
||||
unsigned int TexSemAcquire:1;
|
||||
|
||||
/**R500 Only. How to swizzle the result of a TEX lookup*/
|
||||
unsigned int TexSwizzle:12;
|
||||
/*@}*/
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ struct rc_pair_instruction {
|
|||
unsigned int WriteALUResult:2;
|
||||
unsigned int ALUResultCompare:3;
|
||||
unsigned int Nop:1;
|
||||
unsigned int SemWait:1;
|
||||
};
|
||||
|
||||
typedef void (*rc_pair_foreach_src_fn)
|
||||
|
|
|
|||
|
|
@ -293,10 +293,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
|
|||
}
|
||||
|
||||
if (opcode->HasTexture) {
|
||||
fprintf(f, ", %s%s[%u]",
|
||||
fprintf(f, ", %s%s[%u]%s%s",
|
||||
textarget_to_string(inst->U.I.TexSrcTarget),
|
||||
inst->U.I.TexShadow ? "SHADOW" : "",
|
||||
inst->U.I.TexSrcUnit);
|
||||
inst->U.I.TexSrcUnit,
|
||||
inst->U.I.TexSemWait ? " SEM_WAIT" : "",
|
||||
inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : "");
|
||||
}
|
||||
|
||||
fprintf(f, ";");
|
||||
|
|
@ -348,6 +350,9 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
|
|||
presubtract_op_to_string(
|
||||
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if (inst->SemWait) {
|
||||
fprintf(f, " SEM_WAIT");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
||||
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
|
||||
|
|
|
|||
|
|
@ -3244,7 +3244,8 @@ enum {
|
|||
# define R500_INST_TYPE_OUT (1 << 0)
|
||||
# define R500_INST_TYPE_FC (2 << 0)
|
||||
# define R500_INST_TYPE_TEX (3 << 0)
|
||||
# define R500_INST_TEX_SEM_WAIT (1 << 2)
|
||||
# define R500_INST_TEX_SEM_WAIT_SHIFT 2
|
||||
# define R500_INST_TEX_SEM_WAIT (1 << R500_INST_TEX_SEM_WAIT_SHIFT)
|
||||
# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
|
||||
# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
|
||||
# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
|
||||
|
|
@ -3426,7 +3427,8 @@ enum {
|
|||
# define R500_TEX_INST_LODBIAS (4 << 22)
|
||||
# define R500_TEX_INST_LOD (5 << 22)
|
||||
# define R500_TEX_INST_DXDY (6 << 22)
|
||||
# define R500_TEX_SEM_ACQUIRE (1 << 25)
|
||||
# define R500_TEX_SEM_ACQUIRE_SHIFT 25
|
||||
# define R500_TEX_SEM_ACQUIRE (1 << R500_TEX_SEM_ACQUIRE_SHIFT)
|
||||
# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
|
||||
# define R500_TEX_UNSCALED (1 << 27)
|
||||
#define R300_US_W_FMT 0x46b4
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue