mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
i965/fs: Calculate delta_x and delta_y together.
This lets SIMD16 programs on G45 and Gen5 use the PLN instruction. On Ironlake: total instructions in shared programs: 5634757 -> 5518055 (-2.07%) instructions in affected programs: 1745837 -> 1629135 (-6.68%) helped: 11439 HURT: 4 Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
This commit is contained in:
parent
fde3100fe6
commit
5af0604d52
7 changed files with 79 additions and 74 deletions
|
|
@ -1265,8 +1265,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
|
|||
emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
|
||||
} else {
|
||||
emit(FS_OPCODE_LINTERP, wpos,
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
interp_reg(VARYING_SLOT_POS, 2));
|
||||
}
|
||||
wpos = offset(wpos, 1);
|
||||
|
|
@ -1308,8 +1307,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
|||
barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
|
||||
}
|
||||
return emit(FS_OPCODE_LINTERP, attr,
|
||||
this->delta_x[barycoord_mode],
|
||||
this->delta_y[barycoord_mode], interp);
|
||||
this->delta_xy[barycoord_mode], interp);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1859,8 +1857,8 @@ fs_visitor::assign_urb_setup()
|
|||
*/
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode == FS_OPCODE_LINTERP) {
|
||||
assert(inst->src[2].file == HW_REG);
|
||||
inst->src[2].fixed_hw_reg.nr += urb_start;
|
||||
assert(inst->src[1].file == HW_REG);
|
||||
inst->src[1].fixed_hw_reg.nr += urb_start;
|
||||
}
|
||||
|
||||
if (inst->opcode == FS_OPCODE_CINTERP) {
|
||||
|
|
@ -2114,25 +2112,16 @@ fs_visitor::compact_virtual_grfs()
|
|||
}
|
||||
}
|
||||
|
||||
/* Patch all the references to delta_x/delta_y, since they're used in
|
||||
* register allocation. If they're unused, switch them to BAD_FILE so
|
||||
* we don't think some random VGRF is delta_x/delta_y.
|
||||
/* Patch all the references to delta_xy, since they're used in register
|
||||
* allocation. If they're unused, switch them to BAD_FILE so we don't
|
||||
* think some random VGRF is delta_xy.
|
||||
*/
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
|
||||
if (delta_x[i].file == GRF) {
|
||||
if (remap_table[delta_x[i].reg] != -1) {
|
||||
delta_x[i].reg = remap_table[delta_x[i].reg];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
|
||||
if (delta_xy[i].file == GRF) {
|
||||
if (remap_table[delta_xy[i].reg] != -1) {
|
||||
delta_xy[i].reg = remap_table[delta_xy[i].reg];
|
||||
} else {
|
||||
delta_x[i].file = BAD_FILE;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
|
||||
if (delta_y[i].file == GRF) {
|
||||
if (remap_table[delta_y[i].reg] != -1) {
|
||||
delta_y[i].reg = remap_table[delta_y[i].reg];
|
||||
} else {
|
||||
delta_y[i].file = BAD_FILE;
|
||||
delta_xy[i].file = BAD_FILE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2685,14 +2674,9 @@ fs_visitor::opt_register_renaming()
|
|||
if (progress) {
|
||||
invalidate_live_intervals();
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
|
||||
if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
|
||||
delta_x[i].reg = remap[delta_x[i].reg];
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
|
||||
if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
|
||||
delta_y[i].reg = remap[delta_y[i].reg];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
|
||||
if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
|
||||
delta_xy[i].reg = remap[delta_xy[i].reg];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -514,8 +514,7 @@ public:
|
|||
fs_reg pixel_y;
|
||||
fs_reg wpos_w;
|
||||
fs_reg pixel_w;
|
||||
fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
|
||||
fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
|
||||
fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
|
||||
fs_reg shader_start_time;
|
||||
fs_reg userplane[MAX_CLIP_PLANES];
|
||||
|
||||
|
|
|
|||
|
|
@ -391,12 +391,31 @@ void
|
|||
fs_generator::generate_linterp(fs_inst *inst,
|
||||
struct brw_reg dst, struct brw_reg *src)
|
||||
{
|
||||
/* PLN reads:
|
||||
* / in SIMD16 \
|
||||
* -----------------------------------
|
||||
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
||||
* |-----------------------------------|
|
||||
* |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
|
||||
* -----------------------------------
|
||||
*
|
||||
* but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys:
|
||||
*
|
||||
* -----------------------------------
|
||||
* | src1+0 | src1+1 | src1+2 | src1+3 |
|
||||
* |-----------------------------------|
|
||||
* |(x0, x1)|(y0, y1)| | | in SIMD8
|
||||
* |-----------------------------------|
|
||||
* |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
|
||||
* -----------------------------------
|
||||
*
|
||||
* See also: emit_interpolation_setup_gen4().
|
||||
*/
|
||||
struct brw_reg delta_x = src[0];
|
||||
struct brw_reg delta_y = src[1];
|
||||
struct brw_reg interp = src[2];
|
||||
struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
|
||||
struct brw_reg interp = src[1];
|
||||
|
||||
if (brw->has_pln &&
|
||||
delta_y.nr == delta_x.nr + 1 &&
|
||||
(brw->gen >= 7 || (delta_x.nr & 1) == 0)) {
|
||||
brw_PLN(p, dst, interp, delta_x);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1482,8 +1482,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
*/
|
||||
no16("interpolate_at_* not yet supported in SIMD16 mode.");
|
||||
|
||||
fs_reg dst_x = vgrf(2);
|
||||
fs_reg dst_y = offset(dst_x, 1);
|
||||
fs_reg dst_xy = vgrf(2);
|
||||
|
||||
/* For most messages, we need one reg of ignored data; the hardware
|
||||
* requires mlen==1 even when there is no payload. in the per-slot
|
||||
|
|
@ -1495,7 +1494,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_interp_var_at_centroid:
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_interp_var_at_sample: {
|
||||
|
|
@ -1503,7 +1502,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
|
||||
assert(const_sample);
|
||||
unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src,
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
|
||||
fs_reg(msg_data));
|
||||
break;
|
||||
}
|
||||
|
|
@ -1515,7 +1514,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
|
||||
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
|
||||
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
|
||||
fs_reg(off_x | (off_y << 4)));
|
||||
} else {
|
||||
src = vgrf(glsl_type::ivec2_type);
|
||||
|
|
@ -1548,7 +1547,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
mlen = 2;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
|
||||
fs_reg(0u));
|
||||
}
|
||||
break;
|
||||
|
|
@ -1567,7 +1566,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
|
||||
src.type = dest.type;
|
||||
|
||||
emit(FS_OPCODE_LINTERP, dest, dst_x, dst_y, src);
|
||||
emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
|
||||
dest = offset(dest, 1);
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -244,7 +244,7 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
|
|||
}
|
||||
assert(reg == ra_reg_count);
|
||||
|
||||
/* Add a special class for aligned pairs, which we'll put delta_x/y
|
||||
/* Add a special class for aligned pairs, which we'll put delta_xy
|
||||
* in on Gen <= 6 so that we can do PLN.
|
||||
*/
|
||||
if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) {
|
||||
|
|
@ -558,14 +558,14 @@ fs_visitor::assign_regs(bool allow_spilling)
|
|||
* second operand of a PLN instruction needs to be an
|
||||
* even-numbered register, so we have a special register class
|
||||
* wm_aligned_pairs_class to handle this case. pre-GEN6 always
|
||||
* uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
|
||||
* uses this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
|
||||
* second operand of a PLN instruction (since it doesn't support
|
||||
* any other interpolation modes). So all we need to do is find
|
||||
* that register and set it to the appropriate class.
|
||||
*/
|
||||
if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 &&
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
|
||||
c = screen->wm_reg_sets[rsi].aligned_pairs_class;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -593,8 +593,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
|
||||
/* 1. collect interpolation factors */
|
||||
|
||||
fs_reg dst_x = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
|
||||
fs_reg dst_y = offset(dst_x, 1);
|
||||
fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
|
||||
|
||||
/* for most messages, we need one reg of ignored data; the hardware requires mlen==1
|
||||
* even when there is no payload. in the per-slot offset case, we'll replace this with
|
||||
|
|
@ -606,7 +605,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
|
||||
switch (ir->operation) {
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
|
||||
break;
|
||||
|
||||
case ir_binop_interpolate_at_sample: {
|
||||
|
|
@ -614,7 +613,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
assert(sample_num || !"nonconstant sample number should have been lowered.");
|
||||
|
||||
unsigned msg_data = sample_num->value.i[0] << 4;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -623,7 +622,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
if (const_offset) {
|
||||
unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
|
||||
(pack_pixel_offset(const_offset->value.f[1]) << 4);
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
|
||||
fs_reg(msg_data));
|
||||
} else {
|
||||
/* pack the operands: hw wants offsets as 4 bit signed ints */
|
||||
|
|
@ -656,7 +655,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
}
|
||||
|
||||
mlen = 2 * reg_width;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
|
||||
fs_reg(0u));
|
||||
}
|
||||
break;
|
||||
|
|
@ -678,8 +677,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
|||
|
||||
for (int i = 0; i < ir->type->vector_elements; i++) {
|
||||
int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
|
||||
emit(FS_OPCODE_LINTERP, res,
|
||||
dst_x, dst_y,
|
||||
emit(FS_OPCODE_LINTERP, res, dst_xy,
|
||||
fs_reg(interp_reg(var->data.location, ch)));
|
||||
res = offset(res, 1);
|
||||
}
|
||||
|
|
@ -3443,31 +3441,31 @@ fs_visitor::emit_interpolation_setup_gen4()
|
|||
fs_reg(brw_imm_v(0x11001100))));
|
||||
|
||||
this->current_annotation = "compute pixel deltas from v0";
|
||||
if (brw->has_pln) {
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
|
||||
vgrf(glsl_type::vec2_type);
|
||||
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
|
||||
offset(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1);
|
||||
|
||||
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
|
||||
vgrf(glsl_type::vec2_type);
|
||||
const fs_reg &delta_xy = this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
|
||||
const fs_reg xstart(negate(brw_vec1_grf(1, 0)));
|
||||
const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
|
||||
|
||||
if (brw->has_pln && dispatch_width == 16) {
|
||||
emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart));
|
||||
emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart));
|
||||
emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart))
|
||||
->force_sechalf = true;
|
||||
emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart))
|
||||
->force_sechalf = true;
|
||||
} else {
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
|
||||
vgrf(glsl_type::float_type);
|
||||
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
|
||||
vgrf(glsl_type::float_type);
|
||||
emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart));
|
||||
emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart));
|
||||
}
|
||||
emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
|
||||
emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
|
||||
|
||||
this->current_annotation = "compute pos.w and 1/pos.w";
|
||||
/* Compute wpos.w. It's always in our setup, since it's needed to
|
||||
* interpolate the other attributes.
|
||||
*/
|
||||
this->wpos_w = vgrf(glsl_type::float_type);
|
||||
emit(FS_OPCODE_LINTERP, wpos_w,
|
||||
this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
|
||||
interp_reg(VARYING_SLOT_POS, 3));
|
||||
emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3));
|
||||
/* Compute the pixel 1/W value from wpos.w. */
|
||||
this->pixel_w = vgrf(glsl_type::float_type);
|
||||
emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
|
||||
|
|
@ -3509,8 +3507,7 @@ fs_visitor::emit_interpolation_setup_gen6()
|
|||
|
||||
for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
|
||||
uint8_t reg = payload.barycentric_coord_reg[i];
|
||||
this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
|
||||
this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
|
||||
this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
|
||||
}
|
||||
|
||||
this->current_annotation = NULL;
|
||||
|
|
|
|||
|
|
@ -704,6 +704,13 @@ brw_vec8_grf(unsigned nr, unsigned subnr)
|
|||
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
|
||||
}
|
||||
|
||||
/** Construct float[16] general-purpose register */
|
||||
static inline struct brw_reg
|
||||
brw_vec16_grf(unsigned nr, unsigned subnr)
|
||||
{
|
||||
return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
|
||||
}
|
||||
|
||||
|
||||
static inline struct brw_reg
|
||||
brw_uw8_grf(unsigned nr, unsigned subnr)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue