i965: Don't set interleave or complete on TCS EOT message.

Setting interleave on the TCS EOT message causes Ivybridge hardware to
GPU hang like crazy.  Individual tests would pass, but running even a
simple test like nop.shader_test in a loop would hang within 1-3 runs.
Adding sleep delays worked around the problem, somehow.

Interleave doesn't make much sense given that we only have one patch
URB handle, not two.  Complete doesn't seem useful either.

There's no reason to actually set those bits.  We were just being lazy.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
Kenneth Graunke 2015-12-24 13:09:26 -08:00
parent b7793783b3
commit bd8ab8dedb
5 changed files with 41 additions and 5 deletions

View file

@ -1315,6 +1315,7 @@ enum opcode {
TCS_OPCODE_CREATE_BARRIER_HEADER,
TCS_OPCODE_SRC0_010_IS_ZERO,
TCS_OPCODE_RELEASE_INPUT,
TCS_OPCODE_THREAD_END,
TES_OPCODE_GET_PRIMITIVE_ID,
TES_OPCODE_CREATE_INPUT_READ_HEADER,

View file

@ -572,6 +572,8 @@ brw_instruction_name(enum opcode op)
return "tcs_src0<0,1,0>_is_zero";
case TCS_OPCODE_RELEASE_INPUT:
return "tcs_release_input";
case TCS_OPCODE_THREAD_END:
return "tcs_thread_end";
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return "tes_create_input_read_header";
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:

View file

@ -276,6 +276,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_POW:
return 2;
case VS_OPCODE_URB_WRITE:
case TCS_OPCODE_THREAD_END:
return 1;
case VS_OPCODE_PULL_CONSTANT_LOAD:
return 2;

View file

@ -758,8 +758,12 @@ generate_tcs_urb_write(struct brw_codegen *p,
true /* header */, false /* eot */);
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
brw_inst_set_eot(devinfo, send, 1);
} else {
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
}
/* what happens to swizzles? */
}
@ -968,6 +972,30 @@ generate_tcs_release_input(struct brw_codegen *p,
BRW_URB_SWIZZLE_INTERLEAVE);
}
static void
generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg header = brw_message_reg(inst->base_mrf);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, header, brw_imm_ud(0));
brw_MOV(p, get_element_ud(header, 0),
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
inst->base_mrf, /* starting mrf reg nr */
header,
BRW_URB_WRITE_EOT | inst->urb_write_flags,
inst->mlen,
0, /* response len */
0, /* urb destination offset */
0);
}
static void
generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
@ -1892,6 +1920,10 @@ generate_code(struct brw_codegen *p,
generate_tcs_release_input(p, dst, src[0], src[1]);
break;
case TCS_OPCODE_THREAD_END:
generate_tcs_thread_end(p, inst);
break;
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);

View file

@ -203,9 +203,9 @@ vec4_tcs_visitor::emit_thread_end()
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
emit_shader_time_end();
inst = emit(VS_OPCODE_URB_WRITE);
inst->mlen = 1; /* just the header, no data. */
inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
inst = emit(TCS_OPCODE_THREAD_END);
inst->base_mrf = 14;
inst->mlen = 1;
}