i965: Introduce the FIND_LIVE_CHANNEL pseudo-opcode.

This instruction calculates the index of an arbitrary channel enabled
in the current execution mask.  It's expected to be used as input for
the BROADCAST opcode, but it's implemented as a separate instruction
rather than being baked into BROADCAST because FIND_LIVE_CHANNEL has
no dependencies so it can always be CSE'ed with other instances of the
same instruction within a basic block.

v2: Whitespace fixes.

Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Francisco Jerez 2015-04-23 14:42:53 +03:00
parent f2fad0dc80
commit 715bc6d8b1
6 changed files with 94 additions and 0 deletions

View file

@ -918,6 +918,14 @@ enum opcode {
SHADER_OPCODE_URB_WRITE_SIMD8,
/**
* Return the index of an arbitrary live channel (i.e. one of the channels
* enabled in the current execution mask) and assign it to the first
* component of the destination. Expected to be used as input for the
* BROADCAST pseudo-opcode.
*/
SHADER_OPCODE_FIND_LIVE_CHANNEL,
/**
* Pick the channel from its first source register given by the index
* specified as second source. Useful for variable indexing of surfaces.

View file

@ -461,6 +461,10 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
unsigned msg_length,
unsigned response_length);
void
brw_find_live_channel(struct brw_codegen *p,
struct brw_reg dst);
void
brw_broadcast(struct brw_codegen *p,
struct brw_reg dst,

View file

@ -3212,6 +3212,78 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
brw_inst_set_pi_message_data(devinfo, insn, data);
}
void
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
{
const struct brw_device_info *devinfo = p->devinfo;
brw_inst *inst;
assert(devinfo->gen >= 7);
brw_push_insn_state(p);
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
if (devinfo->gen >= 8) {
/* Getting the first active channel index is easy on Gen8: Just find
* the first bit set in the mask register. The same register exists
* on HSW already but it reads back as all ones when the current
* instruction has execution masking disabled, so it's kind of
* useless.
*/
inst = brw_FBL(p, vec1(dst),
retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
/* Quarter control has the effect of magically shifting the value of
* this register. Make sure it's set to zero.
*/
brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q);
} else {
const struct brw_reg flag = retype(brw_flag_reg(1, 0),
BRW_REGISTER_TYPE_UD);
brw_MOV(p, flag, brw_imm_ud(0));
/* Run a 16-wide instruction returning zero with execution masking
* and a conditional modifier enabled in order to get the current
* execution mask in f1.0.
*/
inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0));
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16);
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
brw_FBL(p, vec1(dst), flag);
}
} else {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
if (devinfo->gen >= 8) {
/* In SIMD4x2 mode the first active channel index is just the
* negation of the first bit of the mask register.
*/
inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
brw_imm_ud(1));
} else {
/* Overwrite the destination without and with execution masking to
* find out which of the channels is active.
*/
brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
brw_imm_ud(1));
inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
brw_imm_ud(0));
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
}
}
brw_pop_insn_state(p);
}
void
brw_broadcast(struct brw_codegen *p,
struct brw_reg dst,

View file

@ -2061,6 +2061,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_set_simd4x2_offset(inst, dst, src[0]);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
brw_find_live_channel(p, dst);
break;
case SHADER_OPCODE_BROADCAST:
brw_broadcast(p, dst, src[0], src[1]);
break;

View file

@ -517,6 +517,8 @@ brw_instruction_name(enum opcode op)
case SHADER_OPCODE_URB_WRITE_SIMD8:
return "gen8_urb_write_simd8";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
case SHADER_OPCODE_BROADCAST:
return "broadcast";

View file

@ -1512,6 +1512,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
brw_memory_fence(p, dst);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
brw_find_live_channel(p, dst);
break;
case SHADER_OPCODE_BROADCAST:
brw_broadcast(p, dst, src[0], src[1]);
break;