diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 7284bdc45ea..dbb42f4ea76 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1443,11 +1443,14 @@ void brw_math( struct brw_compile *p, assert(src.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); - /* Source modifiers are ignored for extended math instructions. */ - assert(!src.negate); - assert(!src.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src.negate); + assert(!src.abs); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1507,8 +1510,10 @@ void brw_math2(struct brw_compile *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) { + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -1520,11 +1525,13 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } - /* Source modifiers are ignored for extended math instructions. */ - assert(!src0.negate); - assert(!src0.abs); - assert(!src1.negate); - assert(!src1.abs); + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + } /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b66febbde00..2a5f5105bd5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -559,10 +559,10 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) * expanding that result out, but we would need to be careful with * masking. * - * The hardware ignores source modifiers (negate and abs) on math + * Gen 6 hardware ignores source modifiers (negate and abs) on math * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && (src.file == UNIFORM || + if (intel->gen == 6 && (src.file == UNIFORM || src.abs || src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); @@ -596,7 +596,9 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) return NULL; } - if (intel->gen >= 6) { + if (intel->gen >= 7) { + inst = emit(opcode, dst, src0, src1); + } else if (intel->gen == 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. * * The hardware ignores source modifiers (negate and abs) on math diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index e2ad6499e95..3e4503028b1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -490,6 +490,13 @@ public: void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_math1_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); void generate_math1_gen6(fs_inst *inst, struct brw_reg dst, struct brw_reg src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 0c14baf367d..b613fb081fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -142,6 +142,31 @@ fs_visitor::generate_linterp(fs_inst *inst, } } +void +fs_visitor::generate_math1_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0) +{ + assert(inst->mlen == 0); + brw_math(p, dst, + brw_math_function(inst->opcode), + inst->saturate ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE, + 0, src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +void +fs_visitor::generate_math2_gen7(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + assert(inst->mlen == 0); + brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); +} + void fs_visitor::generate_math1_gen6(fs_inst *inst, struct brw_reg dst, @@ -797,7 +822,9 @@ fs_visitor::generate_code() case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - if (intel->gen >= 6) { + if (intel->gen >= 7) { + generate_math1_gen7(inst, dst, src[0]); + } else if (intel->gen == 6) { generate_math1_gen6(inst, dst, src[0]); } else { generate_math_gen4(inst, dst, src[0]); @@ -807,6 +834,8 @@ fs_visitor::generate_code() case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: if (intel->gen >= 6) { + generate_math2_gen7(inst, dst, src[0], src[1]); + } else if (intel->gen == 6) { generate_math2_gen6(inst, dst, src[0], src[1]); } else { generate_math_gen4(inst, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a83a6b24dbc..93ccda9548a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -533,6 +533,10 @@ public: struct brw_reg dst, struct brw_reg src0, struct brw_reg src1); + void generate_math2_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); void generate_urb_write(vec4_instruction *inst); void generate_oword_dual_block_offsets(struct brw_reg m1, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 66c3b7e13d9..dc39f17614a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -282,6 +282,18 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, brw_set_access_mode(p, BRW_ALIGN_16); } +void +vec4_visitor::generate_math2_gen7(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); +} + void vec4_visitor::generate_math2_gen6(vec4_instruction *inst, struct brw_reg dst, @@ -552,9 +564,10 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - if (intel->gen >= 6) { + if (intel->gen == 6) { generate_math1_gen6(inst, dst, src[0]); } else { + /* Also works for Gen7. */ generate_math1_gen4(inst, dst, src[0]); } break; @@ -562,7 +575,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - if (intel->gen >= 6) { + if (intel->gen >= 7) { + generate_math2_gen7(inst, dst, src[0], src[1]); + } else if (intel->gen == 6) { generate_math2_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]);