intel/eu: Switch to a logical state stack

Instead of the state stack that's based on copying a dummy instruction
around, we start using a logical stack of brw_insn_states.  This uses a
bit less memory and is way less conceptually bogus.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand 2018-05-29 14:37:35 -07:00
parent db9675f5a4
commit 6a9525bf67
3 changed files with 72 additions and 126 deletions

View file

@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
unsigned unsigned
brw_get_default_exec_size(struct brw_codegen *p) brw_get_default_exec_size(struct brw_codegen *p)
{ {
return brw_inst_exec_size(p->devinfo, p->current); return p->current->exec_size;
} }
unsigned unsigned
brw_get_default_group(struct brw_codegen *p) brw_get_default_group(struct brw_codegen *p)
{ {
if (p->devinfo->gen >= 6) { return p->current->group;
unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
if (p->devinfo->gen >= 7)
group += brw_inst_nib_control(p->devinfo, p->current) * 4;
return group;
} else {
unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
if (qtr_control == BRW_COMPRESSION_COMPRESSED)
return 0;
else
return qtr_control * 8;
}
} }
unsigned unsigned
brw_get_default_access_mode(struct brw_codegen *p) brw_get_default_access_mode(struct brw_codegen *p)
{ {
return brw_inst_access_mode(p->devinfo, p->current); return p->current->access_mode;
} }
void void
brw_set_default_exec_size(struct brw_codegen *p, unsigned value) brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
{ {
brw_inst_set_exec_size(p->devinfo, p->current, value); p->current->exec_size = value;
} }
void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc ) void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc )
{ {
brw_inst_set_pred_control(p->devinfo, p->current, pc); p->current->predicate = pc;
} }
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse) void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
{ {
brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse); p->current->pred_inv = predicate_inverse;
} }
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg) void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
{ {
if (p->devinfo->gen >= 7) assert(subreg < 2);
brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg); p->current->flag_subreg = reg * 2 + subreg;
brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg);
} }
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ) void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
{ {
brw_inst_set_access_mode(p->devinfo, p->current, access_mode); p->current->access_mode = access_mode;
} }
void void
brw_set_default_compression_control(struct brw_codegen *p, brw_set_default_compression_control(struct brw_codegen *p,
enum brw_compression compression_control) enum brw_compression compression_control)
{ {
if (p->devinfo->gen >= 6) {
/* Since we don't use the SIMD32 support in gen6, we translate
* the pre-gen6 compression control here.
*/
switch (compression_control) { switch (compression_control) {
case BRW_COMPRESSION_NONE: case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf /* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option. * according to execsize" option.
*/ */
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q); p->current->group = 0;
break; break;
case BRW_COMPRESSION_2NDHALF: case BRW_COMPRESSION_2NDHALF:
/* For SIMD8, this is "use the second set of 8 bits." */ /* For SIMD8, this is "use the second set of 8 bits." */
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q); p->current->group = 8;
break; break;
case BRW_COMPRESSION_COMPRESSED: case BRW_COMPRESSION_COMPRESSED:
/* For SIMD16 instruction compression, use the first set of 16 bits /* For SIMD16 instruction compression, use the first set of 16 bits
* since we don't do SIMD32 dispatch. * since we don't do SIMD32 dispatch.
*/ */
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H); p->current->group = 0;
break; break;
default: default:
unreachable("not reached"); unreachable("not reached");
} }
} else {
brw_inst_set_qtr_control(p->devinfo, p->current, compression_control); if (p->devinfo->gen <= 6) {
p->current->compressed =
(compression_control == BRW_COMPRESSION_COMPRESSED);
} }
} }
@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo,
void void
brw_set_default_compression(struct brw_codegen *p, bool on) brw_set_default_compression(struct brw_codegen *p, bool on)
{ {
brw_inst_set_compression(p->devinfo, p->current, on); p->current->compressed = on;
} }
/** /**
@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo,
void void
brw_set_default_group(struct brw_codegen *p, unsigned group) brw_set_default_group(struct brw_codegen *p, unsigned group)
{ {
brw_inst_set_group(p->devinfo, p->current, group); p->current->group = group;
} }
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ) void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
{ {
brw_inst_set_mask_control(p->devinfo, p->current, value); p->current->mask_control = value;
} }
void brw_set_default_saturate( struct brw_codegen *p, bool enable ) void brw_set_default_saturate( struct brw_codegen *p, bool enable )
{ {
brw_inst_set_saturate(p->devinfo, p->current, enable); p->current->saturate = enable;
} }
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value) void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
{ {
if (p->devinfo->gen >= 6) p->current->acc_wr_control = value;
brw_inst_set_acc_wr_control(p->devinfo, p->current, value);
} }
void brw_push_insn_state( struct brw_codegen *p ) void brw_push_insn_state( struct brw_codegen *p )

View file

@ -46,6 +46,36 @@ extern "C" {
#define BRW_EU_MAX_INSN_STACK 5 #define BRW_EU_MAX_INSN_STACK 5
struct brw_insn_state {
/* One of BRW_EXECUTE_* */
unsigned exec_size:3;
/* Group in units of channels */
unsigned group:5;
/* Compression control on gen4-5 */
bool compressed:1;
/* One of BRW_MASK_* */
unsigned mask_control:1;
bool saturate:1;
/* One of BRW_ALIGN_* */
unsigned access_mode:1;
/* One of BRW_PREDICATE_* */
enum brw_predicate predicate:4;
bool pred_inv:1;
/* Flag subreg. Bottom bit is subreg, top bit is reg */
unsigned flag_subreg:2;
bool acc_wr_control:1;
};
/* A helper for accessing the last instruction emitted. This makes it easy /* A helper for accessing the last instruction emitted. This makes it easy
* to set various bits on an instruction without having to create temporary * to set various bits on an instruction without having to create temporary
* variable and assign the emitted instruction to those. * variable and assign the emitted instruction to those.
@ -62,8 +92,8 @@ struct brw_codegen {
/* Allow clients to push/pop instruction state: /* Allow clients to push/pop instruction state:
*/ */
brw_inst stack[BRW_EU_MAX_INSN_STACK]; struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
brw_inst *current; struct brw_insn_state *current;
/** Whether or not the user wants automatic exec sizes /** Whether or not the user wants automatic exec sizes
* *

View file

@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset); brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
} }
struct brw_insn_state {
/* One of BRW_EXECUTE_* */
unsigned exec_size:3;
/* Group in units of channels */
unsigned group:5;
/* Compression control on gen4-5 */
bool compressed:1;
/* One of BRW_MASK_* */
unsigned mask_control:1;
bool saturate:1;
/* One of BRW_ALIGN_* */
unsigned access_mode:1;
/* One of BRW_PREDICATE_* */
enum brw_predicate predicate:4;
bool pred_inv:1;
/* Flag subreg. Bottom bit is subreg, top bit is reg */
unsigned flag_subreg:2;
bool acc_wr_control:1;
};
static struct brw_insn_state
brw_inst_get_state(const struct gen_device_info *devinfo,
const brw_inst *insn)
{
struct brw_insn_state state = { };
state.exec_size = brw_inst_exec_size(devinfo, insn);
if (devinfo->gen >= 6) {
state.group = brw_inst_qtr_control(devinfo, insn) * 8;
if (devinfo->gen >= 7)
state.group += brw_inst_nib_control(devinfo, insn) * 4;
} else {
unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
state.group = 0;
state.compressed = true;
} else {
state.group = qtr_control * 8;
state.compressed = false;
}
}
state.access_mode = brw_inst_access_mode(devinfo, insn);
state.mask_control = brw_inst_mask_control(devinfo, insn);
state.saturate = brw_inst_saturate(devinfo, insn);
state.predicate = brw_inst_pred_control(devinfo, insn);
state.pred_inv = brw_inst_pred_inv(devinfo, insn);
state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
if (devinfo->gen >= 7)
state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
if (devinfo->gen >= 6)
state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
return state;
}
static void static void
brw_inst_set_state(const struct gen_device_info *devinfo, brw_inst_set_state(const struct gen_device_info *devinfo,
brw_inst *insn, brw_inst *insn,
@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
brw_inst_set_opcode(devinfo, insn, opcode); brw_inst_set_opcode(devinfo, insn, opcode);
/* Apply the default instruction state */ /* Apply the default instruction state */
struct brw_insn_state current = brw_inst_get_state(devinfo, p->current); brw_inst_set_state(devinfo, insn, p->current);
brw_inst_set_state(devinfo, insn, &current);
return insn; return insn;
} }
@ -3504,9 +3437,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
*/ */
inst = brw_FBL(p, vec1(dst), exec_mask); inst = brw_FBL(p, vec1(dst), exec_mask);
} else { } else {
const struct brw_reg flag = brw_flag_reg( const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
brw_inst_flag_reg_nr(devinfo, p->current), p->current->flag_subreg % 2);
brw_inst_flag_subreg_nr(devinfo, p->current));
brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));