i965/gs: Add GS_OPCODE_SET_WRITE_OFFSET.

v2: Added a comment to vec4_generator::generate_gs_set_write_offset().

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Paul Berry 2013-03-23 07:59:13 -07:00
parent 4416cb7992
commit ce722fd65d
4 changed files with 57 additions and 0 deletions

View file

@ -817,6 +817,22 @@ enum opcode {
* for Slot {0,1}" fields in the message header.
*/
GS_OPCODE_THREAD_END,
/**
* Set the "Slot {0,1} Offset" fields of a URB_WRITE message header.
*
* - dst is the MRF containing the message header.
*
* - src0.x indicates which portion of the URB should be written to (e.g. a
* vertex number)
*
* - src1 is an immediate multiplier which will be applied to src0
* (e.g. the size of a single vertex in the URB).
*
* Note: the hardware will apply this offset *in addition to* the offset in
* vec4_instruction::offset.
*/
GS_OPCODE_SET_WRITE_OFFSET,
};
#define BRW_PREDICATE_NONE 0

View file

@ -501,6 +501,8 @@ brw_instruction_name(enum opcode op)
return "gs_urb_write";
case GS_OPCODE_THREAD_END:
return "gs_thread_end";
case GS_OPCODE_SET_WRITE_OFFSET:
return "set_write_offset";
default:
/* Yes, this leaks. It's in debug code, it should never occur, and if

View file

@ -630,6 +630,9 @@ private:
void generate_vs_urb_write(vec4_instruction *inst);
void generate_gs_urb_write(vec4_instruction *inst);
void generate_gs_thread_end(vec4_instruction *inst);
void generate_gs_set_write_offset(struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
void generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index);
void generate_scratch_write(vec4_instruction *inst,

View file

@ -442,6 +442,38 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
void
vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.3):
*
* Slot 0 Offset. This field, after adding to the Global Offset field
* in the message descriptor, specifies the offset (in 256-bit units)
* from the start of the URB entry, as referenced by URB Handle 0, at
* which the data will be accessed.
*
* Similar text describes DWORD M0.4, which is slot 1 offset.
*
* Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
* of the register for geometry shader invocations 0 and 1) by the
* immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
*
* We can do this with the following EU instruction:
*
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
*/
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
src1);
brw_set_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index)
@ -918,6 +950,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
generate_gs_thread_end(inst);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
generate_gs_set_write_offset(dst, src[0], src[1]);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
mark_surface_used(SURF_INDEX_VS_SHADER_TIME);