mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
i965/fs: Rewrite discards to use a flag subreg to track discarded pixels.
This makes much more sense on gen6+, and will also prove useful for early exit of shaders on discard. v2: fix up a stale comment from before converting gen4-5. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
b278f65e1c
commit
d5016495cc
8 changed files with 45 additions and 72 deletions
|
|
@ -673,7 +673,6 @@ enum opcode {
|
|||
FS_OPCODE_PIXEL_Y,
|
||||
FS_OPCODE_CINTERP,
|
||||
FS_OPCODE_LINTERP,
|
||||
FS_OPCODE_DISCARD,
|
||||
FS_OPCODE_SPILL,
|
||||
FS_OPCODE_UNSPILL,
|
||||
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||||
|
|
|
|||
|
|
@ -2434,6 +2434,14 @@ fs_visitor::run()
|
|||
else
|
||||
emit_interpolation_setup_gen6();
|
||||
|
||||
/* We handle discards by keeping track of the still-live pixels in f0.1.
|
||||
* Initialize it with the dispatched pixels.
|
||||
*/
|
||||
if (fp->UsesKill) {
|
||||
fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
|
||||
discard_init->flag_subreg = 1;
|
||||
}
|
||||
|
||||
/* Generate FS IR for main(). (the visitor only descends into
|
||||
* functions called "main").
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -501,7 +501,6 @@ private:
|
|||
void generate_math_gen4(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
void generate_discard(fs_inst *inst);
|
||||
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
|
||||
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
|
||||
bool negate_value);
|
||||
|
|
|
|||
|
|
@ -74,6 +74,17 @@ fs_generator::generate_fb_write(fs_inst *inst)
|
|||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
|
||||
if (fp->UsesKill) {
|
||||
struct brw_reg pixel_mask;
|
||||
|
||||
if (intel->gen >= 6)
|
||||
pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
||||
else
|
||||
pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
|
||||
}
|
||||
|
||||
if (inst->header_present) {
|
||||
if (intel->gen >= 6) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||
|
|
@ -513,58 +524,6 @@ fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src
|
|||
brw_ADD(p, dst, src0, negate(src1));
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_discard(fs_inst *inst)
|
||||
{
|
||||
struct brw_reg f0 = brw_flag_reg(0, 0);
|
||||
|
||||
if (intel->gen >= 6) {
|
||||
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
||||
struct brw_reg some_register;
|
||||
|
||||
/* As of gen6, we no longer have the mask register to look at,
|
||||
* so life gets a bit more complicated.
|
||||
*/
|
||||
|
||||
/* Load the flag register with all ones. */
|
||||
brw_push_insn_state(p);
|
||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, f0, brw_imm_uw(0xffff));
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
/* Do a comparison that should always fail, to produce 0s in the flag
|
||||
* reg where we have active channels.
|
||||
*/
|
||||
some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
|
||||
BRW_CONDITIONAL_NZ, some_register, some_register);
|
||||
|
||||
/* Undo CMP's whacking of predication*/
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_AND(p, g1, f0, g1);
|
||||
brw_pop_insn_state(p);
|
||||
} else {
|
||||
struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
|
||||
/* Unlike the 965, we have the mask reg, so we just need
|
||||
* somewhere to invert that (containing channels to be disabled)
|
||||
* so it can be ANDed with the mask of pixels still to be
|
||||
* written. Use the flag reg for consistency with gen6+.
|
||||
*/
|
||||
brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
|
||||
brw_AND(p, g0, f0, g0);
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
|
||||
{
|
||||
|
|
@ -745,12 +704,16 @@ void
|
|||
fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
|
||||
{
|
||||
struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
|
||||
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
||||
struct brw_reg dispatch_mask;
|
||||
|
||||
if (intel->gen >= 6)
|
||||
dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
|
||||
else
|
||||
dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
assert (intel->gen >= 6);
|
||||
brw_push_insn_state(p);
|
||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, flags, g1);
|
||||
brw_MOV(p, flags, dispatch_mask);
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
|
|
@ -1083,9 +1046,6 @@ fs_generator::generate_code(exec_list *instructions)
|
|||
case SHADER_OPCODE_TXS:
|
||||
generate_tex(inst, dst, src[0]);
|
||||
break;
|
||||
case FS_OPCODE_DISCARD:
|
||||
generate_discard(inst);
|
||||
break;
|
||||
case FS_OPCODE_DDX:
|
||||
generate_ddx(inst, dst, src[0]);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -252,14 +252,15 @@ fs_visitor::emit_fragment_program_code()
|
|||
continue;
|
||||
}
|
||||
|
||||
emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
|
||||
BRW_CONDITIONAL_L));
|
||||
|
||||
if (intel->gen < 6 && dispatch_width == 16)
|
||||
fail("Can't support (non-uniform) control flow on 16-wide");
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
emit(FS_OPCODE_DISCARD);
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
/* Emit an instruction that's predicated on the current
|
||||
* undiscarded pixels, and updates just those pixels to be
|
||||
* turned off.
|
||||
*/
|
||||
fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
|
||||
BRW_CONDITIONAL_GE));
|
||||
cmp->predicate = BRW_PREDICATE_NORMAL;
|
||||
cmp->flag_subreg = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -277,9 +277,6 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
|
|||
payload_last_use_ip[0 / reg_width] = use_ip;
|
||||
payload_last_use_ip[1 / reg_width] = use_ip;
|
||||
break;
|
||||
case FS_OPCODE_DISCARD:
|
||||
payload_last_use_ip[1 / reg_width] = use_ip;
|
||||
break;
|
||||
|
||||
case FS_OPCODE_LINTERP:
|
||||
/* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
|
||||
|
|
|
|||
|
|
@ -299,7 +299,6 @@ instruction_scheduler::calculate_deps()
|
|||
}
|
||||
|
||||
if (inst->predicate) {
|
||||
assert(last_conditional_mod[inst->flag_subreg]);
|
||||
add_dep(last_conditional_mod[inst->flag_subreg], n);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1435,7 +1435,17 @@ fs_visitor::visit(ir_discard *ir)
|
|||
{
|
||||
assert(ir->condition == NULL); /* FINISHME */
|
||||
|
||||
emit(FS_OPCODE_DISCARD);
|
||||
/* We track our discarded pixels in f0.1. By predicating on it, we can
|
||||
* update just the flag bits that aren't yet discarded. By emitting a
|
||||
* CMP of g0 != g0, all our currently executing channels will get turned
|
||||
* off.
|
||||
*/
|
||||
fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
|
||||
BRW_REGISTER_TYPE_UW));
|
||||
fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
|
||||
BRW_CONDITIONAL_NZ));
|
||||
cmp->predicate = BRW_PREDICATE_NORMAL;
|
||||
cmp->flag_subreg = 1;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue