mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-02 15:20:26 +01:00
nv40: remove use of temps for KILP, implement KIL
This commit is contained in:
parent
060127af38
commit
b4c813313a
3 changed files with 100 additions and 75 deletions
|
|
@ -58,13 +58,13 @@ struct nv40_fpc {
|
|||
};
|
||||
|
||||
static INLINE struct nv40_sreg
|
||||
nv40_sr_temp(struct nv40_fpc *fpc)
|
||||
temp(struct nv40_fpc *fpc)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = fpc->temp_temp_count++;
|
||||
idx += fpc->high_temp + 1;
|
||||
return nv40_sr(0, NV40_FP_REG_TYPE_TEMP, idx);
|
||||
return nv40_sr(NV40SR_TEMP, idx);
|
||||
}
|
||||
|
||||
#define arith(cc,s,o,d,m,s0,s1,s2) \
|
||||
|
|
@ -73,22 +73,30 @@ nv40_sr_temp(struct nv40_fpc *fpc)
|
|||
#define tex(cc,s,o,u,d,m,s0,s1,s2) \
|
||||
nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
|
||||
(d), (m), (s0), none, none)
|
||||
#define temp(fpc) nv40_sr_temp((fpc))
|
||||
|
||||
static void
|
||||
emit_src(struct nv40_fpc *fpc, uint32_t *hw, int pos, struct nv40_sreg src)
|
||||
{
|
||||
uint32_t sr = 0;
|
||||
|
||||
sr |= (src.type << NV40_FP_REG_TYPE_SHIFT);
|
||||
if (src.type == NV40_FP_REG_TYPE_INPUT) {
|
||||
switch (src.type) {
|
||||
case NV40SR_INPUT:
|
||||
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
|
||||
hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
|
||||
} else
|
||||
if (src.type == NV40_FP_REG_TYPE_CONST) {
|
||||
fpc->inst_has_const = TRUE;
|
||||
} else
|
||||
if (src.type == NV40_FP_REG_TYPE_TEMP) {
|
||||
break;
|
||||
case NV40SR_TEMP:
|
||||
sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
|
||||
sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
|
||||
break;
|
||||
case NV40SR_CONST:
|
||||
sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
|
||||
fpc->inst_has_const = TRUE;
|
||||
break;
|
||||
case NV40SR_NONE:
|
||||
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (src.negate)
|
||||
|
|
@ -110,16 +118,25 @@ emit_dst(struct nv40_fpc *fpc, uint32_t *hw, struct nv40_sreg dst)
|
|||
{
|
||||
struct nv40_fragment_program *fp = fpc->fp;
|
||||
|
||||
if (dst.output) {
|
||||
switch (dst.type) {
|
||||
case NV40SR_TEMP:
|
||||
if (fp->num_regs < (dst.index + 1))
|
||||
fp->num_regs = dst.index + 1;
|
||||
break;
|
||||
case NV40SR_OUTPUT:
|
||||
if (dst.index == 1) {
|
||||
fp->writes_depth = 1;
|
||||
} else {
|
||||
hw[0] |= NV40_FP_OP_UNK0_7;
|
||||
}
|
||||
} else {
|
||||
if (fp->num_regs < (dst.index + 1))
|
||||
fp->num_regs = dst.index + 1;
|
||||
break;
|
||||
case NV40SR_NONE:
|
||||
hw[0] |= (1 << 30);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
|
||||
}
|
||||
|
||||
|
|
@ -180,29 +197,25 @@ static INLINE struct nv40_sreg
|
|||
tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
|
||||
{
|
||||
struct nv40_sreg src;
|
||||
uint type, index;
|
||||
|
||||
switch (fsrc->SrcRegister.File) {
|
||||
case TGSI_FILE_INPUT:
|
||||
type = NV40_FP_REG_TYPE_INPUT;
|
||||
index = fpc->attrib_map[fsrc->SrcRegister.Index];
|
||||
src = nv40_sr(NV40SR_INPUT,
|
||||
fpc->attrib_map[fsrc->SrcRegister.Index]);
|
||||
break;
|
||||
case TGSI_FILE_CONSTANT:
|
||||
type = NV40_FP_REG_TYPE_CONST;
|
||||
index = fsrc->SrcRegister.Index;
|
||||
src = nv40_sr(NV40SR_CONST, fsrc->SrcRegister.Index);
|
||||
break;
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
type = NV40_FP_REG_TYPE_TEMP;
|
||||
index = fsrc->SrcRegister.Index + 1;
|
||||
if (fpc->high_temp < index)
|
||||
fpc->high_temp = index;
|
||||
src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index + 1);
|
||||
if (fpc->high_temp < src.index)
|
||||
fpc->high_temp = src.index;
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("bad src file\n");
|
||||
break;
|
||||
}
|
||||
|
||||
src = nv40_sr(0, type, index);
|
||||
src.abs = fsrc->SrcRegisterExtMod.Absolute;
|
||||
src.negate = fsrc->SrcRegister.Negate;
|
||||
src.swz[0] = fsrc->SrcRegister.SwizzleX;
|
||||
|
|
@ -214,30 +227,26 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
|
|||
|
||||
static INLINE struct nv40_sreg
|
||||
tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
|
||||
int out, idx;
|
||||
int idx;
|
||||
|
||||
switch (fdst->DstRegister.File) {
|
||||
case TGSI_FILE_OUTPUT:
|
||||
out = 1;
|
||||
if (fdst->DstRegister.Index == fpc->colour_id)
|
||||
idx = 0;
|
||||
return nv40_sr(NV40SR_OUTPUT, 0);
|
||||
else
|
||||
idx = 1;
|
||||
return nv40_sr(NV40SR_OUTPUT, 1);
|
||||
break;
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
out = 0;
|
||||
idx = fdst->DstRegister.Index + 1;
|
||||
if (fpc->high_temp < idx)
|
||||
fpc->high_temp = idx;
|
||||
break;
|
||||
return nv40_sr(NV40SR_TEMP, idx);
|
||||
case TGSI_FILE_NULL:
|
||||
break;
|
||||
return nv40_sr(NV40SR_NONE, 0);
|
||||
default:
|
||||
NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
|
||||
break;
|
||||
return nv40_sr(NV40SR_NONE, 0);
|
||||
}
|
||||
|
||||
return nv40_sr(out, NV40_FP_REG_TYPE_TEMP, idx);
|
||||
}
|
||||
|
||||
static INLINE int
|
||||
|
|
@ -256,8 +265,8 @@ static boolean
|
|||
nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
|
||||
const struct tgsi_full_instruction *finst)
|
||||
{
|
||||
const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
|
||||
struct nv40_sreg src[3], dst, tmp;
|
||||
struct nv40_sreg none = nv40_sr(0, NV40_FP_REG_TYPE_INPUT, 0);
|
||||
int mask, sat, unit;
|
||||
int ai = -1, ci = -1;
|
||||
int i;
|
||||
|
|
@ -361,14 +370,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
|
|||
arith(fpc, sat, FRC, dst, mask, src[0], none, none);
|
||||
break;
|
||||
case TGSI_OPCODE_KIL:
|
||||
arith(fpc, 0, KIL, none, 0, none, none, none);
|
||||
break;
|
||||
case TGSI_OPCODE_KILP:
|
||||
/*XXX: Which is NV, which is ARB kil? ARB implemented here.
|
||||
*XXX: Don't need temp, can update CC0 without writing dst
|
||||
*/
|
||||
tmp = temp(fpc);
|
||||
tmp.cc_update = 1;
|
||||
arith(fpc, 0, MOV, tmp, MASK_ALL, src[0], none, none);
|
||||
dst.cc_test = NV40_FP_OP_COND_LT;
|
||||
dst = nv40_sr(NV40SR_NONE, 0);
|
||||
dst.cc_update = 1;
|
||||
arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
|
||||
dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
|
||||
arith(fpc, 0, KIL, dst, 0, none, none, none);
|
||||
break;
|
||||
case TGSI_OPCODE_LG2:
|
||||
|
|
|
|||
|
|
@ -472,8 +472,13 @@
|
|||
# define NV40_FP_SWIZZLE_W 3
|
||||
#define NV40_FP_REG_NEGATE (1 << 17)
|
||||
|
||||
#define NV40SR_NONE 0
|
||||
#define NV40SR_OUTPUT 1
|
||||
#define NV40SR_INPUT 2
|
||||
#define NV40SR_TEMP 3
|
||||
#define NV40SR_CONST 4
|
||||
|
||||
struct nv40_sreg {
|
||||
int output;
|
||||
int type;
|
||||
int index;
|
||||
|
||||
|
|
@ -491,10 +496,9 @@ struct nv40_sreg {
|
|||
};
|
||||
|
||||
static INLINE struct nv40_sreg
|
||||
nv40_sr(int out, int type, int index)
|
||||
nv40_sr(int type, int index)
|
||||
{
|
||||
struct nv40_sreg temp = {
|
||||
.output = out,
|
||||
.type = type,
|
||||
.index = index,
|
||||
.dst_scale = DEF_SCALE,
|
||||
|
|
|
|||
|
|
@ -54,18 +54,17 @@ struct nv40_vpc {
|
|||
};
|
||||
|
||||
static INLINE struct nv40_sreg
|
||||
nv40_sr_temp(struct nv40_vpc *vpc)
|
||||
temp(struct nv40_vpc *vpc)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = vpc->temp_temp_count++;
|
||||
idx += vpc->high_temp;
|
||||
return nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP, idx);
|
||||
return nv40_sr(NV40SR_TEMP, idx);
|
||||
}
|
||||
|
||||
static INLINE struct nv40_sreg
|
||||
nv40_sr_const(struct nv40_vpc *vpc, int pipe,
|
||||
float x, float y, float z, float w)
|
||||
constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)
|
||||
{
|
||||
struct nv40_vertex_program *vp = vpc->vp;
|
||||
int idx = vp->num_consts;
|
||||
|
|
@ -78,13 +77,11 @@ nv40_sr_const(struct nv40_vpc *vpc, int pipe,
|
|||
vp->consts[idx].value[3] = w;
|
||||
vp->num_consts++;
|
||||
|
||||
return nv40_sr(0, NV40_VP_SRC_REG_TYPE_CONST, idx);
|
||||
return nv40_sr(NV40SR_CONST, idx);
|
||||
}
|
||||
|
||||
#define arith(cc,s,o,d,m,s0,s1,s2) \
|
||||
nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
|
||||
#define temp(vpc) nv40_sr_temp((vpc))
|
||||
#define constant(v,p,x,y,z,w) nv40_sr_const((v), (p), (x), (y), (z), (w))
|
||||
|
||||
static void
|
||||
emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
|
||||
|
|
@ -92,15 +89,28 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
|
|||
struct nv40_vertex_program *vp = vpc->vp;
|
||||
uint32_t sr = 0;
|
||||
|
||||
sr |= (src.type << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
if (src.type == NV40_VP_SRC_REG_TYPE_INPUT) {
|
||||
switch (src.type) {
|
||||
case NV40SR_TEMP:
|
||||
sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
|
||||
break;
|
||||
case NV40SR_INPUT:
|
||||
sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
|
||||
NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
vp->ir |= (1 << src.index);
|
||||
hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
|
||||
} else
|
||||
if (src.type == NV40_VP_SRC_REG_TYPE_CONST) {
|
||||
break;
|
||||
case NV40SR_CONST:
|
||||
sr |= (NV40_VP_SRC_REG_TYPE_CONST <<
|
||||
NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
hw[1] |= (src.index << NV40_VP_INST_CONST_SRC_SHIFT);
|
||||
} else {
|
||||
sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
|
||||
break;
|
||||
case NV40SR_NONE:
|
||||
sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<
|
||||
NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (src.negate)
|
||||
|
|
@ -140,16 +150,18 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
|
|||
{
|
||||
struct nv40_vertex_program *vp = vpc->vp;
|
||||
|
||||
if (dst.output == 0) {
|
||||
switch (dst.type) {
|
||||
case NV40SR_TEMP:
|
||||
hw[3] |= NV40_VP_INST_DEST_MASK;
|
||||
if (slot == 0) {
|
||||
hw[0] |= (dst.index <<
|
||||
NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
|
||||
} else {
|
||||
hw[3] |= (dst.index <<
|
||||
hw[3] |= (dst.index <<
|
||||
NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
case NV40SR_OUTPUT:
|
||||
switch (dst.index) {
|
||||
case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
|
||||
case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
|
||||
|
|
@ -166,7 +178,7 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
|
|||
case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
|
||||
case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
|
||||
|
|
@ -177,6 +189,9 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
|
|||
hw[3] |= NV40_VP_INST_SCA_RESULT;
|
||||
hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -220,8 +235,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
|
|||
|
||||
switch (fsrc->SrcRegister.File) {
|
||||
case TGSI_FILE_INPUT:
|
||||
src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT,
|
||||
fsrc->SrcRegister.Index);
|
||||
src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
|
||||
break;
|
||||
case TGSI_FILE_CONSTANT:
|
||||
src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
|
||||
|
|
@ -229,8 +243,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
|
|||
case TGSI_FILE_TEMPORARY:
|
||||
if (vpc->high_temp < fsrc->SrcRegister.Index)
|
||||
vpc->high_temp = fsrc->SrcRegister.Index;
|
||||
src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP,
|
||||
fsrc->SrcRegister.Index);
|
||||
src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index);
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("bad src file\n");
|
||||
|
|
@ -248,25 +261,25 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
|
|||
|
||||
static INLINE struct nv40_sreg
|
||||
tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
|
||||
uint out, idx;
|
||||
struct nv40_sreg dst;
|
||||
|
||||
switch (fdst->DstRegister.File) {
|
||||
case TGSI_FILE_OUTPUT:
|
||||
out = 1;
|
||||
idx = vpc->output_map[fdst->DstRegister.Index];
|
||||
dst = nv40_sr(NV40SR_OUTPUT,
|
||||
vpc->output_map[fdst->DstRegister.Index]);
|
||||
|
||||
break;
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
out = 0;
|
||||
idx = fdst->DstRegister.Index;
|
||||
if (vpc->high_temp < idx)
|
||||
vpc->high_temp = idx;
|
||||
dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index);
|
||||
if (vpc->high_temp < dst.index)
|
||||
vpc->high_temp = dst.index;
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("bad dst file\n");
|
||||
break;
|
||||
}
|
||||
|
||||
return nv40_sr(out, NV40_VP_SRC_REG_TYPE_TEMP, idx);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static INLINE int
|
||||
|
|
@ -286,7 +299,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
|
|||
const struct tgsi_full_instruction *finst)
|
||||
{
|
||||
struct nv40_sreg src[3], dst, tmp;
|
||||
struct nv40_sreg none = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT, 0);
|
||||
struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
|
||||
int mask;
|
||||
int ai = -1, ci = -1;
|
||||
int i;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue