r300g: Increase fragment shader limits for r400 cards

r400 fragment shaders now support up to 64 temporary registers,
512 ALU instructions, and 512 TEX instructions.
This commit is contained in:
Tom Stellard 2010-12-13 09:11:25 -08:00
parent 1bf3c75825
commit c40ec20c27
8 changed files with 291 additions and 70 deletions

View file

@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer(
}
} else { /* r300 */
struct r300_fragment_program_code *code = &generic_code->code.r300;
unsigned int alu_length = code->alu.length;
unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
unsigned int tex_length = code->tex.length;
unsigned int tex_iterations =
tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
unsigned int iterations =
alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
unsigned int bank = 0;
shader->cb_code_size = 19 +
(r300->screen->caps.is_r400 ? 2 : 0) +
code->alu.length * 4 +
(code->tex.length ? (1 + code->tex.length) : 0) +
imm_count * 5;
shader->cb_code_size = 15 +
/* R400_US_CODE_BANK */
(r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
/* R400_US_CODE_EXT */
(r300->screen->caps.is_r400 ? 2 : 0) +
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
(code->r390_mode ? (5 * alu_iterations) : 4) +
/* R400_US_ALU_EXT_ADDR_[0-63] */
(code->r390_mode ? (code->alu.length) : 0) +
/* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
code->alu.length * 4 +
/* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
(code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
imm_count * 5;
NEW_CB(shader->cb_code, shader->cb_code_size);
if (r300->screen->caps.is_r400)
OUT_CB_REG(R400_US_CODE_BANK, 0);
OUT_CB_REG(R300_US_CONFIG, code->config);
OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
if (code->r390_mode) {
OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
} else if (r300->screen->caps.is_r400) {
/* This register appears to affect shaders even if r390_mode is
* disabled, so it needs to be set to 0 for shaders that
* don't use r390_mode. */
OUT_CB_REG(R400_US_CODE_EXT, 0);
}
OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
OUT_CB_TABLE(code->code_addr, 4);
OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
for (i = 0; i < code->alu.length; i++)
OUT_CB(code->alu.inst[i].rgb_inst);
do {
unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
unsigned int bank_alu_offset = bank * 64;
unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
unsigned int bank_tex_offset = bank * 32;
OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
for (i = 0; i < code->alu.length; i++)
OUT_CB(code->alu.inst[i].rgb_addr);
if (r300->screen->caps.is_r400) {
OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
(bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
}
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
for (i = 0; i < code->alu.length; i++)
OUT_CB(code->alu.inst[i].alpha_inst);
if (bank_alu_length > 0) {
OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
for (i = 0; i < code->alu.length; i++)
OUT_CB(code->alu.inst[i].alpha_addr);
OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
if (code->tex.length) {
OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
OUT_CB_TABLE(code->tex.inst, code->tex.length);
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
if (code->r390_mode) {
OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
for (i = 0; i < bank_alu_length; i++)
OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
}
}
if (bank_tex_length > 0) {
OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
}
alu_length -= bank_alu_length;
tex_length -= bank_tex_length;
bank++;
} while(code->r390_mode && (alu_length > 0 || tex_length > 0));
/* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
* will be rendered incorrectly. */
if (r300->screen->caps.is_r400) {
OUT_CB_REG(R400_US_CODE_BANK,
code->r390_mode ? R400_R390_MODE_ENABLE : 0);
}
/* Emit immediates. */
@ -384,12 +438,17 @@ static void r300_translate_fragment_shader(
compiler.code = &shader->code;
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.is_r400 = r300->screen->caps.is_r400;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_temp_regs =
compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
compiler.Base.max_alu_insts =
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
compiler.Base.max_tex_insts =
(compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;

View file

@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* R4xx extended fragment shader registers. */
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01
# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02
# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04
# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
# define R400_ADDR0_EXT_A_MSB_BIT 0x10
# define R400_ADDR1_EXT_A_MSB_BIT 0x20
# define R400_ADDR2_EXT_A_MSB_BIT 0x40
# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
# define R400_ADDRD_EXT_A_MSB_BIT 0x80
#define R400_US_CODE_BANK 0x46b8
# define R400_BANK_SHIFT 0
# define R400_BANK_MASK 0xf

View file

@ -87,12 +87,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen)
static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
struct r300_screen* r300screen = r300_screen(pscreen);
boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
/* XXX extended shader capabilities of r400 unimplemented */
is_r400 = FALSE;
switch (param) {
/* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
@ -175,9 +171,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
boolean is_r400 = r300screen->caps.is_r400;
boolean is_r500 = r300screen->caps.is_r500;
/* XXX extended shader capabilities of r400 unimplemented */
is_r400 = FALSE;
switch (shader)
{
case PIPE_SHADER_FRAGMENT:

View file

@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst)
}
}
static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
{
return (r400_ext_addr & bit) ? 1 << 5 : 0;
}
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
if (c->is_r400) {
fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
}
for (n = 0; n <= (code->config & 3); n++) {
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
"alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
"alu_end: %u, tex_end: %d (code_addr: %08x)\n", n,
alu_offset, tex_offset, alu_end, tex_end, code_addr);
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
code->alu.inst[i].r400_ext_addr);
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', regc & 31);
(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
sprintf(srca[j], "%c%i",
(rega & 32) ? 'c' : 't', rega & 31);
(rega & 32) ? 'c' : 't', (rega & 31) | msba);
}
dstc[0] = 0;
@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_RGB_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
sprintf(dstc, "t%i.%s ",
(code->alu.inst[i].
rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
((code->alu.inst[i].
rgb_addr >> R300_ALU_DSTC_SHIFT)
& 31) | msb,
flags);
}
sprintf(flags, "%s%s%s",
@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
unsigned int msb = get_msb(
R400_ADDRD_EXT_A_MSB_BIT,
code->alu.inst[i].r400_ext_addr);
sprintf(dsta, "t%i.w ",
(code->alu.inst[i].
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
((code->alu.inst[i].
alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
| msb);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",

View file

@ -64,6 +64,20 @@ struct r300_emit_state {
__FILE__, __FUNCTION__, ##args); \
} while(0)
static unsigned int get_msbs_alu(unsigned int bits)
{
return (bits >> 6) & 0x7;
}
/**
* @param lsbs The number of least significant bits
*/
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
{
return (bits >> lsbs) & 0x15;
}
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
/**
* Mark a temporary register as used.
@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
return src.Index;
return src.Index & 0x1f;
}
return 0;
@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
/* Set the RGB address */
unsigned int src = use_source(code, inst->RGB.Src[j]);
unsigned int arg;
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
code->alu.inst[ip].rgb_addr |= src << (6*j);
/* Set the Alpha address */
src = use_source(code, inst->Alpha.Src[j]);
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
code->alu.inst[ip].alpha_addr |= src << (6*j);
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
code->alu.inst[ip].rgb_addr |=
(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
code->alu.inst[ip].alpha_addr |=
(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit)
unsigned tex_offset;
unsigned tex_end;
unsigned int alu_offset_msbs, alu_end_msbs;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct rc_pair_instruction inst;
@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit)
*
* Also note that the register specification from AMD is slightly
* incorrect in its description of this register. */
code->code_addr[emit->current_node] =
(alu_offset << R300_ALU_START_SHIFT) |
(alu_end << R300_ALU_SIZE_SHIFT) |
(tex_offset << R300_TEX_START_SHIFT) |
(tex_end << R300_TEX_SIZE_SHIFT) |
emit->node_flags;
code->code_addr[emit->current_node] =
((alu_offset << R300_ALU_START_SHIFT)
& R300_ALU_START_MASK)
| ((alu_end << R300_ALU_SIZE_SHIFT)
& R300_ALU_SIZE_MASK)
| ((tex_offset << R300_TEX_START_SHIFT)
& R300_TEX_START_MASK)
| ((tex_end << R300_TEX_SIZE_SHIFT)
& R300_TEX_SIZE_MASK)
| emit->node_flags
| (get_msbs_tex(tex_offset, 5)
<< R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 5)
<< R400_TEX_SIZE_MSB_SHIFT)
;
/* Write r400 extended instruction fields. These will be ignored on
* r300 cards. */
alu_offset_msbs = get_msbs_alu(alu_offset);
alu_end_msbs = get_msbs_alu(alu_end);
switch(emit->current_node) {
case 0:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
break;
case 1:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
break;
case 2:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
break;
case 3:
code->r400_code_offset_ext |=
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
break;
}
return 1;
}
@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
unsigned int opcode;
PROG_CODE;
if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
error("Too many TEX instructions");
return 0;
}
@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
use_temporary(code, inst->U.I.SrcReg[0].Index);
code->tex.inst[code->tex.length++] =
(inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
& R300_SRC_ADDR_MASK)
| ((dest << R300_DST_ADDR_SHIFT)
& R300_DST_ADDR_MASK)
| (unit << R300_TEX_ID_SHIFT)
| (opcode << R300_TEX_INST_SHIFT)
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
R400_SRC_ADDR_EXT_BIT : 0)
| (dest >= R300_PFS_NUM_TEMP_REGS ?
R400_DST_ADDR_EXT_BIT : 0)
;
return 1;
}
@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
unsigned int tex_end;
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
finish_node(&emit);
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
/* Set r400 extended instruction fields. These values will be ignored
* on r300 cards. */
code->r400_code_offset_ext |=
(get_msbs_alu(0)
<< R400_ALU_OFFSET_MSB_SHIFT)
| (get_msbs_alu(code->alu.length - 1)
<< R400_ALU_SIZE_MSB_SHIFT);
tex_end = code->tex.length ? code->tex.length - 1 : 0;
code->code_offset =
(0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
(0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
& R300_PFS_CNTL_ALU_OFFSET_MASK)
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
& R300_PFS_CNTL_ALU_END_MASK)
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
& R300_PFS_CNTL_TEX_OFFSET_MASK)
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
& R300_PFS_CNTL_TEX_END_MASK)
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
;
if (emit.current_node < 3) {
int shift = 3 - emit.current_node;
@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
for(i = 0; i < shift; ++i)
code->code_addr[i] = 0;
}
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
|| code->alu.length > R300_PFS_MAX_ALU_INST
|| code->tex.length > R300_PFS_MAX_TEX_INST) {
code->r390_mode = 1;
}
}

View file

@ -31,6 +31,9 @@
#define R300_PFS_NUM_TEMP_REGS 32
#define R300_PFS_NUM_CONST_REGS 32
#define R400_PFS_MAX_ALU_INST 512
#define R400_PFS_MAX_TEX_INST 512
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
@ -187,24 +190,29 @@ struct r300_fragment_program_node {
*/
struct r300_fragment_program_code {
struct {
int length; /**< total # of texture instructions used */
uint32_t inst[R300_PFS_MAX_TEX_INST];
unsigned int length; /**< total # of texture instructions used */
uint32_t inst[R400_PFS_MAX_TEX_INST];
} tex;
struct {
int length; /**< total # of ALU instructions used */
unsigned int length; /**< total # of ALU instructions used */
struct {
uint32_t rgb_inst;
uint32_t rgb_addr;
uint32_t alpha_inst;
uint32_t alpha_addr;
} inst[R300_PFS_MAX_ALU_INST];
uint32_t r400_ext_addr;
} inst[R400_PFS_MAX_ALU_INST];
} alu;
uint32_t config; /* US_CONFIG */
uint32_t pixsize; /* US_PIXSIZE */
uint32_t code_offset; /* US_CODE_OFFSET */
uint32_t r400_code_offset_ext; /* US_CODE_EXT */
uint32_t code_addr[4]; /* US_CODE_ADDR */
/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
* for r400 cards */
unsigned int r390_mode:1;
};

View file

@ -50,6 +50,7 @@ struct radeon_compiler {
char * ErrorMsg;
/* Hardware specification. */
unsigned is_r400:1;
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
@ -57,6 +58,7 @@ struct radeon_compiler {
unsigned max_temp_regs;
unsigned max_constants;
int max_alu_insts;
unsigned max_tex_insts;
/* Whether to remove unused constants and empty holes in constant space. */
unsigned remove_unused_constants:1;

View file

@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
# define R300_PFS_CNTL_TEX_END_SHIFT 18
# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28
# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28)
/* gap */
@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TEX_SIZE_MASK (31 << 17)
# define R300_RGBA_OUT (1 << 22)
# define R300_W_OUT (1 << 23)
# define R400_TEX_START_MSB_SHIFT 24
# define R400_TEX_START_MSG_MASK (0xf << 24)
# define R400_TEX_SIZE_MSB_SHIFT 28
# define R400_TEX_SIZE_MSG_MASK (0xf << 28)
/* TEX
* As far as I can tell, texture instructions cannot write into output
@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TEX_OP_TXP 3
# define R300_TEX_OP_TXB 4
# define R300_TEX_INST_MASK (7 << 15)
# define R400_SRC_ADDR_EXT_BIT (1 << 19)
# define R400_DST_ADDR_EXT_BIT (1 << 20)
/* Output format from the unfied shader */
#define R300_US_OUT_FMT 0x46A4
@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_OUTA_CLAMP (1 << 30)
/* END: Fragment program instruction set */
/* R4xx extended fragment shader registers. */
#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
# define R400_ADDRD_EXT_A_MSB_BIT 0x80
#define R400_US_CODE_BANK 0x46b8
# define R400_BANK_SHIFT 0
# define R400_BANK_MASK 0xf
# define R400_R390_MODE_ENABLE (1 << 4)
#define R400_US_CODE_EXT 0x46bc
# define R400_ALU_OFFSET_MSB_SHIFT 0
# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0)
# define R400_ALU_SIZE_MSB_SHIFT 3
# define R400_ALU_SIZE_MSB_MASK (0x7 << 3)
# define R400_ALU_START0_MSB_SHIFT 6
# define R400_ALU_START0_MSB_MASK (0x7 << 6)
# define R400_ALU_SIZE0_MSB_SHIFT 9
# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9)
# define R400_ALU_START1_MSB_SHIFT 12
# define R400_ALU_START1_MSB_MASK (0x7 << 12)
# define R400_ALU_SIZE1_MSB_SHIFT 15
# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15)
# define R400_ALU_START2_MSB_SHIFT 18
# define R400_ALU_START2_MSB_MASK (0x7 << 18)
# define R400_ALU_SIZE2_MSB_SHIFT 21
# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21)
# define R400_ALU_START3_MSB_SHIFT 24
# define R400_ALU_START3_MSB_MASK (0x7 << 24)
# define R400_ALU_SIZE3_MSB_SHIFT 27
# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27)
/* END: R4xx extended fragment shader registers. */
/* Fog: Fog Blending Enable */
#define R300_FG_FOG_BLEND 0x4bc0
# define R300_FG_FOG_BLEND_DISABLE (0 << 0)