i965/vs: Respect the gen6 limitation that math opcodes can't be align16.

Fixes vs-acos-vec3 and friends.
This commit is contained in:
Eric Anholt 2011-08-09 11:00:28 -07:00
parent 6408b0295f
commit 250770b74d
2 changed files with 33 additions and 2 deletions

View file

@ -250,6 +250,14 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
/* Source swizzles are ignored. */
assert(!src.abs);
assert(!src.negate);
assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_math(p,
dst,
brw_math_function(inst->opcode),
@ -258,6 +266,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
src,
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
brw_set_access_mode(p, BRW_ALIGN_16);
}
void

View file

@ -129,7 +129,18 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
src_reg temp_src = src_reg(this, glsl_type::vec4_type);
emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
emit(opcode, dst, temp_src);
if (dst.writemask != WRITEMASK_XYZW) {
/* The gen6 math instruction must be align1, so we can't do
* writemasks.
*/
dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
emit(opcode, temp_dst, temp_src);
emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
} else {
emit(opcode, dst, temp_src);
}
}
void
@ -184,7 +195,18 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
emit(BRW_OPCODE_MOV, dst, src1);
src1 = expanded;
emit(opcode, dst, src0, src1);
if (dst.writemask != WRITEMASK_XYZW) {
/* The gen6 math instruction must be align1, so we can't do
* writemasks.
*/
dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
emit(opcode, temp_dst, src0, src1);
emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
} else {
emit(opcode, dst, src0, src1);
}
}
void