2006-08-09 19:14:05 +00:00
|
|
|
|
/*
|
|
|
|
|
|
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
|
Intel funded Tungsten Graphics to
|
2006-08-09 19:14:05 +00:00
|
|
|
|
develop this 3D driver.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
|
|
a copy of this software and associated documentation files (the
|
|
|
|
|
|
"Software"), to deal in the Software without restriction, including
|
|
|
|
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
|
the following conditions:
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
The above copyright notice and this permission notice (including the
|
|
|
|
|
|
next paragraph) shall be included in all copies or substantial
|
|
|
|
|
|
portions of the Software.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
|
|
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
|
|
|
|
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
|
|
|
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
|
|
|
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Authors:
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
|
* Keith Whitwell <keithw@vmware.com>
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2017-03-09 00:44:29 +00:00
|
|
|
|
#include "brw_eu_defines.h"
|
2006-08-09 19:14:05 +00:00
|
|
|
|
#include "brw_eu.h"
|
|
|
|
|
|
|
2014-02-24 23:39:14 -08:00
|
|
|
|
#include "util/ralloc.h"
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-03-16 14:09:17 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* Prior to Sandybridge, the SEND instruction accepted non-MRF source
|
|
|
|
|
|
* registers, implicitly moving the operand to a message register.
|
|
|
|
|
|
*
|
|
|
|
|
|
* On Sandybridge, this is no longer the case. This function performs the
|
|
|
|
|
|
* explicit move; it should be called before emitting a SEND instruction.
|
|
|
|
|
|
*/
|
2011-08-22 10:35:24 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
gen6_resolve_implied_move(struct brw_codegen *p,
|
2011-03-16 14:09:17 -07:00
|
|
|
|
struct brw_reg *src,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_reg_nr)
|
2011-03-16 14:09:17 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-15 14:13:58 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
2011-03-16 14:09:17 -07:00
|
|
|
|
return;
|
|
|
|
|
|
|
2011-11-14 19:49:54 -08:00
|
|
|
|
if (src->file == BRW_MESSAGE_REGISTER_FILE)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
2011-04-03 00:57:30 -07:00
|
|
|
|
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
assert(devinfo->gen < 12);
|
2011-04-03 00:57:30 -07:00
|
|
|
|
brw_push_insn_state(p);
|
2015-04-14 12:40:34 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2011-04-03 00:57:30 -07:00
|
|
|
|
brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
|
|
|
|
|
|
retype(*src, BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
}
|
2011-03-16 14:09:17 -07:00
|
|
|
|
*src = brw_message_reg(msg_reg_nr);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-04-09 00:32:46 -07:00
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
|
2011-04-09 00:32:46 -07:00
|
|
|
|
{
|
2013-07-10 13:16:13 -07:00
|
|
|
|
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
|
2012-01-17 08:08:25 -08:00
|
|
|
|
* "The send with EOT should use register space R112-R127 for <src>. This is
|
|
|
|
|
|
* to enable loading of a new thread into the same slot while the message
|
|
|
|
|
|
* with EOT for current thread is pending dispatch."
|
|
|
|
|
|
*
|
|
|
|
|
|
* Since we're pretending to have 16 MRFs anyway, we may as well use the
|
|
|
|
|
|
* registers required for messages with EOT.
|
|
|
|
|
|
*/
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-15 14:13:58 -07:00
|
|
|
|
if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
|
2011-04-09 00:32:46 -07:00
|
|
|
|
reg->file = BRW_GENERAL_REGISTER_FILE;
|
2012-01-27 12:54:11 -08:00
|
|
|
|
reg->nr += GEN7_MRF_HACK_START;
|
2011-04-09 00:32:46 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
2015-09-16 09:08:19 +02:00
|
|
|
|
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
|
2015-11-02 10:23:12 -08:00
|
|
|
|
assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
|
2018-12-10 11:42:44 -08:00
|
|
|
|
else if (dest.file == BRW_GENERAL_REGISTER_FILE)
|
2009-03-13 09:17:08 -06:00
|
|
|
|
assert(dest.nr < 128);
|
|
|
|
|
|
|
2019-10-23 09:18:03 -07:00
|
|
|
|
/* The hardware has a restriction where a destination of size Byte with
|
|
|
|
|
|
* a stride of 1 is only allowed for a packed byte MOV. For any other
|
|
|
|
|
|
* instruction, the stride must be at least 2, even when the destination
|
|
|
|
|
|
* is the NULL register.
|
2018-11-07 12:08:02 +01:00
|
|
|
|
*/
|
|
|
|
|
|
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
|
|
|
|
dest.nr == BRW_ARF_NULL &&
|
2019-10-23 09:18:03 -07:00
|
|
|
|
type_sz(dest.type) == 1 &&
|
|
|
|
|
|
dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
|
2018-11-07 12:08:02 +01:00
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-04-09 00:32:46 -07:00
|
|
|
|
gen7_convert_mrf_to_grf(p, &dest);
|
|
|
|
|
|
|
2019-08-25 18:13:42 -07:00
|
|
|
|
if (devinfo->gen >= 12 &&
|
|
|
|
|
|
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
|
|
|
|
|
|
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(dest.subnr == 0);
|
|
|
|
|
|
assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
|
|
|
|
|
|
(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
dest.vstride == dest.width + 1));
|
|
|
|
|
|
assert(!dest.negate && !dest.abs);
|
|
|
|
|
|
brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
|
|
|
|
|
|
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
|
|
|
|
|
|
|
|
|
|
|
|
} else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
|
|
|
|
|
|
assert(devinfo->gen < 12);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(dest.subnr % 16 == 0);
|
|
|
|
|
|
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
dest.vstride == dest.width + 1);
|
|
|
|
|
|
assert(!dest.negate && !dest.abs);
|
|
|
|
|
|
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
|
|
|
|
|
|
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
|
|
|
|
|
|
brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
|
|
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
|
|
|
|
|
|
brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
|
|
|
|
|
|
brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
|
|
|
|
|
|
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
|
|
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
|
|
|
|
|
|
brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
|
|
|
|
|
|
if (dest.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
dest.file == BRW_MESSAGE_REGISTER_FILE) {
|
|
|
|
|
|
assert(dest.writemask != 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
|
|
|
|
|
|
* Although Dst.HorzStride is a don't care for Align16, HW needs
|
|
|
|
|
|
* this to be programmed as "01".
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_dst_hstride(devinfo, inst, 1);
|
2013-09-17 11:54:05 -07:00
|
|
|
|
}
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
|
|
|
|
|
|
|
|
|
|
|
|
/* These are different sizes in align1 vs align16:
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
|
|
|
|
|
|
dest.indirect_offset);
|
|
|
|
|
|
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
|
|
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
|
|
|
|
|
|
dest.indirect_offset);
|
|
|
|
|
|
/* even ignored in da16, still need to set as '01' */
|
|
|
|
|
|
brw_inst_set_dst_hstride(devinfo, inst, 1);
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 12:40:34 -07:00
|
|
|
|
/* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
|
|
|
|
|
|
* or 16 (SIMD16), as that's normally correct. However, when dealing with
|
2017-08-31 09:41:22 -07:00
|
|
|
|
* small registers, it can be useful for us to automatically reduce it to
|
|
|
|
|
|
* match the register size.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2017-08-31 09:41:22 -07:00
|
|
|
|
if (p->automatic_exec_sizes) {
|
|
|
|
|
|
/*
|
|
|
|
|
|
* In platforms that support fp64 we can emit instructions with a width
|
|
|
|
|
|
* of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
|
|
|
|
|
|
* these cases we need to make sure that these instructions have their
|
|
|
|
|
|
* exec sizes set properly when they are emitted and we can't rely on
|
|
|
|
|
|
* this code to fix it.
|
|
|
|
|
|
*/
|
|
|
|
|
|
bool fix_exec_size;
|
|
|
|
|
|
if (devinfo->gen >= 6)
|
|
|
|
|
|
fix_exec_size = dest.width < BRW_EXECUTE_4;
|
|
|
|
|
|
else
|
|
|
|
|
|
fix_exec_size = dest.width < BRW_EXECUTE_8;
|
|
|
|
|
|
|
|
|
|
|
|
if (fix_exec_size)
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, inst, dest.width);
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2012-11-09 11:38:14 -08:00
|
|
|
|
|
2015-09-16 09:08:19 +02:00
|
|
|
|
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
|
2015-11-02 10:23:12 -08:00
|
|
|
|
assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
|
2018-12-10 11:42:44 -08:00
|
|
|
|
else if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
2009-03-13 09:17:08 -06:00
|
|
|
|
assert(reg.nr < 128);
|
|
|
|
|
|
|
2011-04-09 00:32:46 -07:00
|
|
|
|
gen7_convert_mrf_to_grf(p, ®);
|
|
|
|
|
|
|
2018-11-15 15:17:06 -06:00
|
|
|
|
if (devinfo->gen >= 6 &&
|
|
|
|
|
|
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
|
2012-11-09 11:38:14 -08:00
|
|
|
|
/* Any source modifiers or regions will be ignored, since this just
|
|
|
|
|
|
* identifies the MRF/GRF to start reading the message contents from.
|
|
|
|
|
|
* Check for some likely failures.
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(!reg.negate);
|
|
|
|
|
|
assert(!reg.abs);
|
|
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2019-08-25 18:13:42 -07:00
|
|
|
|
if (devinfo->gen >= 12 &&
|
|
|
|
|
|
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
|
|
|
|
|
|
assert(reg.file != BRW_IMMEDIATE_VALUE);
|
|
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(reg.subnr == 0);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
|
|
|
|
|
assert(!reg.negate && !reg.abs);
|
|
|
|
|
|
brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
|
|
|
|
|
|
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
|
|
|
|
|
|
|
|
|
|
|
|
} else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(reg.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(reg.subnr % 16 == 0);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
|
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(!reg.negate && !reg.abs);
|
|
|
|
|
|
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
|
|
|
|
|
|
brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
|
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
|
|
|
|
|
|
brw_inst_set_src0_abs(devinfo, inst, reg.abs);
|
|
|
|
|
|
brw_inst_set_src0_negate(devinfo, inst, reg.negate);
|
|
|
|
|
|
brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
|
|
|
|
|
|
|
|
|
|
|
|
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
if (reg.type == BRW_REGISTER_TYPE_DF ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
|
|
|
|
|
|
brw_inst_set_imm_df(devinfo, inst, reg.df);
|
|
|
|
|
|
else if (reg.type == BRW_REGISTER_TYPE_UQ ||
|
|
|
|
|
|
reg.type == BRW_REGISTER_TYPE_Q)
|
|
|
|
|
|
brw_inst_set_imm_uq(devinfo, inst, reg.u64);
|
|
|
|
|
|
else
|
|
|
|
|
|
brw_inst_set_imm_ud(devinfo, inst, reg.ud);
|
|
|
|
|
|
|
2019-08-25 17:52:54 -07:00
|
|
|
|
if (devinfo->gen < 12 && type_sz(reg.type) < 8) {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src1_reg_file(devinfo, inst,
|
|
|
|
|
|
BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
brw_inst_set_src1_reg_hw_type(devinfo, inst,
|
|
|
|
|
|
brw_inst_src0_reg_hw_type(devinfo, inst));
|
|
|
|
|
|
}
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
|
|
|
|
|
|
brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
|
|
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
if (reg.width == BRW_WIDTH_1 &&
|
|
|
|
|
|
brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
|
|
|
|
|
|
brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
|
|
|
|
|
|
brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
|
|
|
|
|
|
brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
|
|
|
|
|
|
brw_inst_set_src0_width(devinfo, inst, reg.width);
|
|
|
|
|
|
brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
|
|
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src0_da16_swiz_x(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
|
|
|
|
|
|
brw_inst_set_src0_da16_swiz_y(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
|
|
|
|
|
|
brw_inst_set_src0_da16_swiz_z(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
|
|
|
|
|
|
brw_inst_set_src0_da16_swiz_w(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
|
|
|
|
|
|
|
|
|
|
|
|
if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
|
|
|
|
|
|
/* This is an oddity of the fact we're using the same
|
|
|
|
|
|
* descriptions for registers in align_16 as align_1:
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
|
|
|
|
|
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
|
|
|
|
|
reg.type == BRW_REGISTER_TYPE_DF &&
|
|
|
|
|
|
reg.vstride == BRW_VERTICAL_STRIDE_2) {
|
|
|
|
|
|
/* From SNB PRM:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "For Align16 access mode, only encodings of 0000 and 0011
|
|
|
|
|
|
* are allowed. Other codes are reserved."
|
|
|
|
|
|
*
|
|
|
|
|
|
* Presumably the DevSNB behavior applies to IVB as well.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
|
|
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-05-02 14:49:24 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2006-09-01 14:18:06 +00:00
|
|
|
|
|
2018-12-10 11:42:44 -08:00
|
|
|
|
if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
2012-11-27 14:10:52 -08:00
|
|
|
|
assert(reg.nr < 128);
|
2009-03-13 09:17:08 -06:00
|
|
|
|
|
2018-11-15 15:17:06 -06:00
|
|
|
|
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC ||
|
|
|
|
|
|
(devinfo->gen >= 12 &&
|
|
|
|
|
|
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) {
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(reg.subnr == 0);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(!reg.negate && !reg.abs);
|
|
|
|
|
|
brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
|
|
|
|
|
|
brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
|
|
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Accumulator registers may be accessed explicitly as src0
|
|
|
|
|
|
* operands only."
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
|
|
|
|
|
|
reg.nr != BRW_ARF_ACCUMULATOR);
|
2015-09-10 16:19:22 -07:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
gen7_convert_mrf_to_grf(p, ®);
|
|
|
|
|
|
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
|
2011-04-09 00:32:46 -07:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
|
|
|
|
|
|
brw_inst_set_src1_abs(devinfo, inst, reg.abs);
|
|
|
|
|
|
brw_inst_set_src1_negate(devinfo, inst, reg.negate);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* Only src1 can be immediate in two-argument instructions.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2018-11-15 17:40:32 -06:00
|
|
|
|
assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
/* two-argument instructions can only use 32-bit immediates */
|
|
|
|
|
|
assert(type_sz(reg.type) < 8);
|
|
|
|
|
|
brw_inst_set_imm_ud(devinfo, inst, reg.ud);
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* This is a hardware restriction, which may or may not be lifted
|
|
|
|
|
|
* in the future:
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
|
|
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
if (reg.width == BRW_WIDTH_1 &&
|
|
|
|
|
|
brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
|
|
|
|
|
|
brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
|
|
|
|
|
|
brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
|
|
|
|
|
|
brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
|
|
|
|
|
|
brw_inst_set_src1_width(devinfo, inst, reg.width);
|
|
|
|
|
|
brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
|
|
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
brw_inst_set_src1_da16_swiz_x(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
|
|
|
|
|
|
brw_inst_set_src1_da16_swiz_y(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
|
|
|
|
|
|
brw_inst_set_src1_da16_swiz_z(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
|
|
|
|
|
|
brw_inst_set_src1_da16_swiz_w(devinfo, inst,
|
|
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
|
|
|
|
|
|
|
|
|
|
|
|
if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
|
|
|
|
|
|
/* This is an oddity of the fact we're using the same
|
|
|
|
|
|
* descriptions for registers in align_16 as align_1:
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
|
|
|
|
|
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
|
|
|
|
|
reg.type == BRW_REGISTER_TYPE_DF &&
|
|
|
|
|
|
reg.vstride == BRW_VERTICAL_STRIDE_2) {
|
|
|
|
|
|
/* From SNB PRM:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "For Align16 access mode, only encodings of 0000 and 0011
|
|
|
|
|
|
* are allowed. Other codes are reserved."
|
|
|
|
|
|
*
|
|
|
|
|
|
* Presumably the DevSNB behavior applies to IVB as well.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
|
|
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2018-06-02 13:48:42 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* Specify the descriptor and extended descriptor immediate for a SEND(C)
|
|
|
|
|
|
* message instruction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
|
|
|
|
|
|
unsigned desc, unsigned ex_desc)
|
|
|
|
|
|
{
|
|
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-06-03 13:20:45 -07:00
|
|
|
|
assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
|
2019-09-03 12:18:38 -07:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_inst_set_src1_file_type(devinfo, inst,
|
|
|
|
|
|
BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
|
2018-06-02 13:48:42 -07:00
|
|
|
|
brw_inst_set_send_desc(devinfo, inst, desc);
|
2018-06-03 13:20:45 -07:00
|
|
|
|
if (devinfo->gen >= 9)
|
2018-06-02 13:48:42 -07:00
|
|
|
|
brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
static void brw_set_math_message( struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *inst,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned function,
|
|
|
|
|
|
unsigned integer_type,
|
2011-10-07 12:26:50 -07:00
|
|
|
|
bool low_precision,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned dataType )
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2011-09-28 17:37:52 -07:00
|
|
|
|
unsigned msg_length;
|
|
|
|
|
|
unsigned response_length;
|
|
|
|
|
|
|
|
|
|
|
|
/* Infer message length from the function */
|
|
|
|
|
|
switch (function) {
|
|
|
|
|
|
case BRW_MATH_FUNCTION_POW:
|
|
|
|
|
|
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
|
|
|
|
|
|
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
|
|
|
|
|
|
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
|
|
|
|
|
|
msg_length = 2;
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
msg_length = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Infer response length from the function */
|
|
|
|
|
|
switch (function) {
|
|
|
|
|
|
case BRW_MATH_FUNCTION_SINCOS:
|
|
|
|
|
|
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
|
|
|
|
|
|
response_length = 2;
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
response_length = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, inst, brw_message_desc(
|
|
|
|
|
|
devinfo, msg_length, response_length, false));
|
2012-08-06 14:59:39 -07:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_math_msg_function(devinfo, inst, function);
|
|
|
|
|
|
brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
|
|
|
|
|
|
brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
|
|
|
|
|
|
brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst));
|
|
|
|
|
|
brw_inst_set_math_msg_data_type(devinfo, inst, dataType);
|
|
|
|
|
|
brw_inst_set_saturate(devinfo, inst, 0);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-07-13 10:48:43 +08:00
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
static void brw_set_ff_sync_message(struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn,
|
2011-10-07 12:26:50 -07:00
|
|
|
|
bool allocate,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned response_length,
|
2011-10-07 12:26:50 -07:00
|
|
|
|
bool end_of_thread)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, insn, brw_message_desc(
|
|
|
|
|
|
devinfo, 1, response_length, true));
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
|
|
|
|
|
|
brw_inst_set_eot(devinfo, insn, end_of_thread);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
|
|
|
|
|
|
brw_inst_set_urb_allocate(devinfo, insn, allocate);
|
2014-06-04 17:08:57 -07:00
|
|
|
|
/* The following fields are not used by FF_SYNC: */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_global_offset(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_urb_swizzle_control(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_urb_used(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_urb_complete(devinfo, insn, 0);
|
2009-07-13 10:48:43 +08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
static void brw_set_urb_message( struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn,
|
i965: Allow C++ type safety in the use of enum brw_urb_write_flags.
(From a suggestion by Francisco Jerez)
If an enum represents a bitfield of flags, e.g.:
enum E {
A = 1,
B = 2,
C = 4,
D = 8,
};
then C++ normally prohibits statements like this:
enum E x = A | B;
because A and B are implicitly converted to ints before OR-ing them,
and an int can't be stored in an enum without a type cast. C, on the
other hand, allows an int to be implicitly converted to an enum
without casting.
In the past we've dealt with this situation by storing flag bitfields
as ints. This avoids ugly casting at the expense of some type safety
that C++ would normally have offered (e.g. we get no warning if we
accidentally use the wrong enum type).
However, we can get the best of both worlds if we override the |
operator. The ugly casting is confined to the operator overload, and
we still get the benefit of C++ making sure we don't use the wrong
enum type.
v2: Remove unnecessary comment and unnecessary use of "enum" keyword.
Use static_cast.
Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2013-08-23 13:19:19 -07:00
|
|
|
|
enum brw_urb_write_flags flags,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned response_length,
|
|
|
|
|
|
unsigned offset,
|
|
|
|
|
|
unsigned swizzle_control )
|
2009-07-13 10:48:43 +08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2011-10-07 11:59:06 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
|
|
|
|
|
|
assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
|
|
|
|
|
|
assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, insn, brw_message_desc(
|
|
|
|
|
|
devinfo, msg_length, response_length, true));
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
|
|
|
|
|
|
brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT));
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
|
|
|
|
|
if (flags & BRW_URB_WRITE_OWORD) {
|
|
|
|
|
|
assert(msg_length == 2); /* header + one OWORD of data */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD);
|
2014-06-04 17:08:57 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD);
|
2014-06-04 17:08:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_global_offset(devinfo, insn, offset);
|
|
|
|
|
|
brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control);
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 8) {
|
|
|
|
|
|
brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE));
|
2014-06-04 17:08:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 7) {
|
|
|
|
|
|
brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE));
|
|
|
|
|
|
brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED));
|
2011-10-07 11:59:06 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_urb_per_slot_offset(devinfo, insn,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
!!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
|
2011-10-07 11:59:06 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
gen7_set_dp_scratch_message(struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
bool write,
|
|
|
|
|
|
bool dword,
|
|
|
|
|
|
bool invalidate_after_read,
|
|
|
|
|
|
unsigned num_regs,
|
|
|
|
|
|
unsigned addr_offset,
|
|
|
|
|
|
unsigned mlen,
|
|
|
|
|
|
unsigned rlen,
|
|
|
|
|
|
bool header_present)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-04 17:08:57 -07:00
|
|
|
|
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
|
2015-04-14 18:00:06 -07:00
|
|
|
|
(devinfo->gen >= 8 && num_regs == 8));
|
2019-12-06 09:20:09 -08:00
|
|
|
|
const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
|
2016-05-16 16:03:33 -07:00
|
|
|
|
num_regs - 1);
|
|
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, inst, brw_message_desc(
|
|
|
|
|
|
devinfo, mlen, rlen, header_present));
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_sfid(devinfo, inst, GEN7_SFID_DATAPORT_DATA_CACHE);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
|
|
|
|
|
|
brw_inst_set_scratch_read_write(devinfo, inst, write);
|
|
|
|
|
|
brw_inst_set_scratch_type(devinfo, inst, dword);
|
|
|
|
|
|
brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
|
2016-05-16 16:03:33 -07:00
|
|
|
|
brw_inst_set_scratch_block_size(devinfo, inst, block_size);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
|
2014-06-04 17:08:57 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-05-29 13:45:57 -07:00
|
|
|
|
static void
|
|
|
|
|
|
brw_inst_set_state(const struct gen_device_info *devinfo,
|
|
|
|
|
|
brw_inst *insn,
|
|
|
|
|
|
const struct brw_insn_state *state)
|
|
|
|
|
|
{
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, state->exec_size);
|
|
|
|
|
|
brw_inst_set_group(devinfo, insn, state->group);
|
|
|
|
|
|
brw_inst_set_compression(devinfo, insn, state->compressed);
|
|
|
|
|
|
brw_inst_set_access_mode(devinfo, insn, state->access_mode);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, state->mask_control);
|
2018-11-09 14:13:36 -08:00
|
|
|
|
if (devinfo->gen >= 12)
|
|
|
|
|
|
brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb));
|
2018-05-29 13:45:57 -07:00
|
|
|
|
brw_inst_set_saturate(devinfo, insn, state->saturate);
|
|
|
|
|
|
brw_inst_set_pred_control(devinfo, insn, state->predicate);
|
|
|
|
|
|
brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
|
|
|
|
|
|
|
2018-05-29 15:28:36 -07:00
|
|
|
|
if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) &&
|
|
|
|
|
|
state->access_mode == BRW_ALIGN_16) {
|
|
|
|
|
|
brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
|
|
|
|
|
if (devinfo->gen >= 7)
|
|
|
|
|
|
brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
|
|
|
|
|
if (devinfo->gen >= 7)
|
|
|
|
|
|
brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
|
|
|
|
|
}
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen >= 6)
|
|
|
|
|
|
brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-08-07 21:59:12 -05:00
|
|
|
|
static brw_inst *
|
|
|
|
|
|
brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align)
|
|
|
|
|
|
{
|
|
|
|
|
|
assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
|
|
|
|
|
|
assert(util_is_power_of_two_or_zero(align));
|
|
|
|
|
|
const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1);
|
|
|
|
|
|
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
|
|
|
|
|
|
const unsigned new_nr_insn = start_insn + nr_insn;
|
|
|
|
|
|
|
|
|
|
|
|
if (p->store_size < new_nr_insn) {
|
|
|
|
|
|
p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
|
|
|
|
|
|
p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Memset any padding due to alignment to 0. We don't want to be hashing
|
|
|
|
|
|
* or caching a bunch of random bits we got from a memory allocation.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (p->nr_insn < start_insn) {
|
|
|
|
|
|
memset(&p->store[p->nr_insn], 0,
|
|
|
|
|
|
(start_insn - p->nr_insn) * sizeof(brw_inst));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
|
|
|
|
|
|
p->nr_insn = new_nr_insn;
|
|
|
|
|
|
p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
|
|
|
|
|
|
|
|
|
|
|
|
return &p->store[start_insn];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_realign(struct brw_codegen *p, unsigned align)
|
|
|
|
|
|
{
|
|
|
|
|
|
brw_append_insns(p, 0, align);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
|
brw_append_data(struct brw_codegen *p, void *data,
|
|
|
|
|
|
unsigned size, unsigned align)
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
|
|
|
|
|
|
void *dst = brw_append_insns(p, nr_insn, align);
|
|
|
|
|
|
memcpy(dst, data, size);
|
|
|
|
|
|
|
|
|
|
|
|
/* If it's not a whole number of instructions, memset the end */
|
|
|
|
|
|
if (size < nr_insn * sizeof(brw_inst))
|
|
|
|
|
|
memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
|
|
|
|
|
|
|
|
|
|
|
|
return dst - (void *)p->store;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
#define next_insn brw_next_insn
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_next_insn(struct brw_codegen *p, unsigned opcode)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2020-08-07 21:59:12 -05:00
|
|
|
|
brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-05-29 13:45:57 -07:00
|
|
|
|
memset(insn, 0, sizeof(*insn));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_opcode(devinfo, insn, opcode);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
|
|
|
|
|
/* Apply the default instruction state */
|
2018-05-29 14:37:35 -07:00
|
|
|
|
brw_inst_set_state(devinfo, insn, p->current);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
static brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu1(struct brw_codegen *p, unsigned opcode,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg dest, struct brw_reg src)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, opcode);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
static brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu2(struct brw_codegen *p, unsigned opcode,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2015-08-03 14:47:36 -07:00
|
|
|
|
/* 64-bit immediates are only supported on 1-src instructions */
|
|
|
|
|
|
assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4);
|
|
|
|
|
|
assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4);
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, opcode);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-03-22 10:05:42 -07:00
|
|
|
|
static int
|
|
|
|
|
|
get_3src_subreg_nr(struct brw_reg reg)
|
|
|
|
|
|
{
|
i965: Move 3-src subnr swizzle handling into the vec4 backend.
While most align16 instructions only support a SubRegNum of 0 or 4
(using swizzling to control the other channels), 3-src instructions
actually support arbitrary SubRegNums. When the RepCtrl bit is set,
we believe it ignores the swizzle and uses the equivalent of a <0,1,0>
region from the subnr.
In the past, we adopted a vec4-centric approach of specifying subnr of
0 or 4 and a swizzle, then having brw_eu_emit.c convert that to a proper
SubRegNum. This isn't a great fit for the scalar backend, where we
don't set swizzles at all, and happily set subnrs in the range [0, 7].
This patch changes brw_eu_emit.c to use subnr and swizzle directly,
relying on the higher levels to set them sensibly.
This should fix problems where scalar sources get copy propagated into
3-src instructions in the FS backend. I've only observed this with
TES push model inputs, but I suppose it could happen in other cases.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2015-12-31 12:47:19 -08:00
|
|
|
|
/* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
|
|
|
|
|
|
* use 32-bit units (components 0..7). Since they only support F/D/UD
|
|
|
|
|
|
* types, this doesn't lose any flexibility, but uses fewer bits.
|
|
|
|
|
|
*/
|
|
|
|
|
|
return reg.subnr / 4;
|
2010-03-22 10:05:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2017-12-11 11:44:48 -08:00
|
|
|
|
static enum gen10_align1_3src_vertical_stride
|
2018-11-09 14:13:35 -08:00
|
|
|
|
to_3src_align1_vstride(const struct gen_device_info *devinfo,
|
|
|
|
|
|
enum brw_vertical_stride vstride)
|
2017-12-11 11:44:48 -08:00
|
|
|
|
{
|
|
|
|
|
|
switch (vstride) {
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_0:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
|
2018-11-09 14:13:35 -08:00
|
|
|
|
case BRW_VERTICAL_STRIDE_1:
|
|
|
|
|
|
assert(devinfo->gen >= 12);
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
|
2017-12-11 11:44:48 -08:00
|
|
|
|
case BRW_VERTICAL_STRIDE_2:
|
2018-11-09 14:13:35 -08:00
|
|
|
|
assert(devinfo->gen < 12);
|
2017-12-11 11:44:48 -08:00
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_4:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_8:
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_16:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
|
|
|
|
|
|
default:
|
|
|
|
|
|
unreachable("invalid vstride");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static enum gen10_align1_3src_src_horizontal_stride
|
|
|
|
|
|
to_3src_align1_hstride(enum brw_horizontal_stride hstride)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (hstride) {
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_0:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_1:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_2:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_4:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
|
|
|
|
|
|
default:
|
|
|
|
|
|
unreachable("invalid hstride");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
static brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
|
2010-03-22 10:05:42 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *inst = next_insn(p, opcode);
|
2010-03-22 10:05:42 -07:00
|
|
|
|
|
|
|
|
|
|
gen7_convert_mrf_to_grf(p, &dest);
|
|
|
|
|
|
|
|
|
|
|
|
assert(dest.nr < 128);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen >= 10)
|
|
|
|
|
|
assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
|
|
src2.file == BRW_IMMEDIATE_VALUE));
|
|
|
|
|
|
|
2019-04-15 23:26:47 -07:00
|
|
|
|
assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
|
|
|
|
|
|
assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
|
|
|
|
|
|
assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
|
2010-03-22 10:05:42 -07:00
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
|
|
|
|
|
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
|
2018-11-09 14:13:35 -08:00
|
|
|
|
if (devinfo->gen >= 12) {
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
|
2018-11-09 14:13:35 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
|
|
|
|
|
|
BRW_ALIGN1_3SRC_ACCUMULATOR);
|
|
|
|
|
|
brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
|
|
|
|
|
|
BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
|
|
|
|
|
|
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
|
|
|
|
|
|
}
|
2017-06-14 14:49:52 -07:00
|
|
|
|
}
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
|
|
|
|
|
|
|
|
|
|
|
|
if (brw_reg_type_is_floating_point(dest.type)) {
|
|
|
|
|
|
brw_inst_set_3src_a1_exec_type(devinfo, inst,
|
|
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_exec_type(devinfo, inst,
|
|
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
|
|
|
|
|
|
brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
|
|
|
|
|
|
|
2019-07-25 18:28:06 -07:00
|
|
|
|
if (src0.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_vstride(
|
|
|
|
|
|
devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
|
|
|
|
|
|
to_3src_align1_hstride(src0.hstride));
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
|
|
|
|
|
|
if (src0.type == BRW_REGISTER_TYPE_NF) {
|
|
|
|
|
|
brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
|
|
|
|
|
|
}
|
|
|
|
|
|
brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
|
|
|
|
|
|
brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
|
|
|
|
|
|
}
|
2018-11-09 14:13:35 -08:00
|
|
|
|
brw_inst_set_3src_a1_src1_vstride(
|
|
|
|
|
|
devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
|
2017-12-11 11:44:48 -08:00
|
|
|
|
to_3src_align1_hstride(src1.hstride));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
|
|
|
|
|
brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
|
|
|
|
|
|
if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
|
|
|
|
|
|
brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
|
|
|
|
|
|
}
|
|
|
|
|
|
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
|
|
|
|
|
|
brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
|
|
|
|
|
|
|
2019-07-25 18:28:06 -07:00
|
|
|
|
if (src2.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
|
|
|
|
|
|
to_3src_align1_hstride(src2.hstride));
|
|
|
|
|
|
/* no vstride on src2 */
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
|
|
|
|
|
|
brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
|
|
|
|
|
|
brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
|
|
|
|
|
|
brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
|
|
|
|
|
|
}
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
|
|
|
|
|
assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
|
2017-06-14 11:03:19 -07:00
|
|
|
|
src0.file == BRW_IMMEDIATE_VALUE ||
|
|
|
|
|
|
(src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_NF));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
src1.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
src2.file == BRW_IMMEDIATE_VALUE);
|
|
|
|
|
|
|
2018-11-09 14:13:35 -08:00
|
|
|
|
if (devinfo->gen >= 12) {
|
2019-04-19 13:37:17 -07:00
|
|
|
|
if (src0.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2018-11-09 14:13:35 -08:00
|
|
|
|
brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
|
2019-04-19 13:37:17 -07:00
|
|
|
|
|
|
|
|
|
|
if (src2.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
|
|
|
|
|
|
}
|
2018-11-09 14:13:35 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
|
|
|
|
|
|
src0.file == BRW_GENERAL_REGISTER_FILE ?
|
|
|
|
|
|
BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
|
|
|
|
|
|
BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
|
|
|
|
|
|
brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
|
|
|
|
|
|
src1.file == BRW_GENERAL_REGISTER_FILE ?
|
|
|
|
|
|
BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
|
|
|
|
|
|
BRW_ALIGN1_3SRC_ACCUMULATOR);
|
|
|
|
|
|
brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
|
|
|
|
|
|
src2.file == BRW_GENERAL_REGISTER_FILE ?
|
|
|
|
|
|
BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
|
|
|
|
|
|
BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-06-14 14:49:52 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
|
|
|
|
|
dest.file == BRW_MESSAGE_REGISTER_FILE);
|
|
|
|
|
|
assert(dest.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
dest.type == BRW_REGISTER_TYPE_DF ||
|
|
|
|
|
|
dest.type == BRW_REGISTER_TYPE_D ||
|
2018-05-22 08:17:17 +02:00
|
|
|
|
dest.type == BRW_REGISTER_TYPE_UD ||
|
|
|
|
|
|
(dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
if (devinfo->gen == 6) {
|
|
|
|
|
|
brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
|
|
|
|
|
|
dest.file == BRW_MESSAGE_REGISTER_FILE);
|
|
|
|
|
|
}
|
|
|
|
|
|
brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
|
2019-10-17 09:54:02 -05:00
|
|
|
|
brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
|
|
|
|
|
|
|
|
|
|
|
|
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
|
|
brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
|
|
|
|
|
|
brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
|
|
|
|
|
|
brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
|
|
|
|
|
|
brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
|
|
|
|
|
|
brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
|
|
|
|
|
|
brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
|
|
|
|
|
|
src0.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
|
|
|
|
|
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
|
|
brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
|
|
|
|
|
|
brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
|
|
|
|
|
|
brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
|
|
|
|
|
|
brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
|
|
|
|
|
|
brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
|
|
|
|
|
|
brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
|
|
|
|
|
|
src1.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
|
|
|
|
|
assert(src2.file == BRW_GENERAL_REGISTER_FILE);
|
|
|
|
|
|
brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
|
|
|
|
|
|
brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
|
|
|
|
|
|
brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
|
|
|
|
|
|
brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
|
|
|
|
|
|
brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
|
|
|
|
|
|
brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
|
|
|
|
|
|
src2.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen >= 7) {
|
|
|
|
|
|
/* Set both the source and destination types based on dest.type,
|
|
|
|
|
|
* ignoring the source register types. The MAD and LRP emitters ensure
|
|
|
|
|
|
* that all four types are float. The BFE and BFI2 emitters, however,
|
|
|
|
|
|
* may send us mixed D and UD types and want us to ignore that and use
|
|
|
|
|
|
* the destination type.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
|
|
|
|
|
|
brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
|
2018-05-22 08:17:38 +02:00
|
|
|
|
|
|
|
|
|
|
/* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Three source instructions can use operands with mixed-mode
|
|
|
|
|
|
* precision. When SrcType field is set to :f or :hf it defines
|
|
|
|
|
|
* precision for source 0 only, and fields Src1Type and Src2Type
|
|
|
|
|
|
* define precision for other source operands:
|
|
|
|
|
|
*
|
|
|
|
|
|
* 0b = :f. Single precision Float (32-bit).
|
|
|
|
|
|
* 1b = :hf. Half precision Float (16-bit)."
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (src1.type == BRW_REGISTER_TYPE_HF)
|
|
|
|
|
|
brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
|
|
|
|
|
|
|
|
|
|
|
|
if (src2.type == BRW_REGISTER_TYPE_HF)
|
|
|
|
|
|
brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
}
|
2013-04-17 12:23:54 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
return inst;
|
2010-03-22 10:05:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Convenience routines.
|
|
|
|
|
|
*/
|
|
|
|
|
|
#define ALU1(OP) \
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_inst *brw_##OP(struct brw_codegen *p, \
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define ALU2(OP) \
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_inst *brw_##OP(struct brw_codegen *p, \
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-03-22 10:05:42 -07:00
|
|
|
|
#define ALU3(OP) \
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_inst *brw_##OP(struct brw_codegen *p, \
|
2010-03-22 10:05:42 -07:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1, \
|
|
|
|
|
|
struct brw_reg src2) \
|
2018-12-08 21:50:36 -08:00
|
|
|
|
{ \
|
|
|
|
|
|
if (p->current->access_mode == BRW_ALIGN_16) { \
|
|
|
|
|
|
if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src1.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src2.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
} \
|
2010-03-22 10:05:42 -07:00
|
|
|
|
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-06-13 14:55:18 -07:00
|
|
|
|
#define ALU3F(OP) \
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_inst *brw_##OP(struct brw_codegen *p, \
|
2013-06-13 14:55:18 -07:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1, \
|
|
|
|
|
|
struct brw_reg src2) \
|
|
|
|
|
|
{ \
|
2014-10-20 11:46:37 +03:00
|
|
|
|
assert(dest.type == BRW_REGISTER_TYPE_F || \
|
|
|
|
|
|
dest.type == BRW_REGISTER_TYPE_DF); \
|
|
|
|
|
|
if (dest.type == BRW_REGISTER_TYPE_F) { \
|
|
|
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_F); \
|
|
|
|
|
|
assert(src1.type == BRW_REGISTER_TYPE_F); \
|
|
|
|
|
|
assert(src2.type == BRW_REGISTER_TYPE_F); \
|
|
|
|
|
|
} else if (dest.type == BRW_REGISTER_TYPE_DF) { \
|
|
|
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_DF); \
|
|
|
|
|
|
assert(src1.type == BRW_REGISTER_TYPE_DF); \
|
|
|
|
|
|
assert(src2.type == BRW_REGISTER_TYPE_DF); \
|
2018-12-08 21:50:36 -08:00
|
|
|
|
} \
|
|
|
|
|
|
\
|
|
|
|
|
|
if (p->current->access_mode == BRW_ALIGN_16) { \
|
|
|
|
|
|
if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src1.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src2.swizzle = BRW_SWIZZLE_XXXX; \
|
2014-10-20 11:46:37 +03:00
|
|
|
|
} \
|
2013-06-13 14:55:18 -07:00
|
|
|
|
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU2(SEL)
|
|
|
|
|
|
ALU1(NOT)
|
|
|
|
|
|
ALU2(AND)
|
|
|
|
|
|
ALU2(OR)
|
|
|
|
|
|
ALU2(XOR)
|
|
|
|
|
|
ALU2(SHR)
|
|
|
|
|
|
ALU2(SHL)
|
2016-07-07 08:38:22 +02:00
|
|
|
|
ALU1(DIM)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU2(ASR)
|
2019-05-29 11:43:30 -07:00
|
|
|
|
ALU2(ROL)
|
|
|
|
|
|
ALU2(ROR)
|
2015-11-22 20:12:17 -08:00
|
|
|
|
ALU3(CSEL)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU1(FRC)
|
|
|
|
|
|
ALU1(RNDD)
|
2020-01-16 11:17:14 -08:00
|
|
|
|
ALU1(RNDE)
|
2019-08-22 11:15:50 -05:00
|
|
|
|
ALU1(RNDU)
|
2020-01-16 11:17:14 -08:00
|
|
|
|
ALU1(RNDZ)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU2(MAC)
|
|
|
|
|
|
ALU2(MACH)
|
|
|
|
|
|
ALU1(LZD)
|
|
|
|
|
|
ALU2(DP4)
|
|
|
|
|
|
ALU2(DPH)
|
|
|
|
|
|
ALU2(DP3)
|
|
|
|
|
|
ALU2(DP2)
|
intel/compiler/fs: Implement FS_OPCODE_LINTERP with MADs on Gen11+
The PLN instruction is no more. Its functionality is now implemented
using two MAD instructions with the new native-float type. Instead of
pln(16) r20.0<1>:F r10.4<0;1,0>:F r4.0<8;8,1>:F
we now have
mad(8) acc0<1>:NF r10.7<0;1,0>:F r4.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r20.0<1>:F acc0<8;8,1>:NF r5.0<8;8,1>:F r10.5<0;1,0>:F
mad(8) acc0<1>:NF r10.7<0;1,0>:F r6.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r21.0<1>:F acc0<8;8,1>:NF r7.0<8;8,1>:F r10.5<0;1,0>:F
... and in the case of SIMD8 only the first pair of MAD instructions is
used.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-06-14 14:47:19 -07:00
|
|
|
|
ALU3(MAD)
|
2013-06-13 14:55:18 -07:00
|
|
|
|
ALU3F(LRP)
|
2013-04-09 17:56:19 -07:00
|
|
|
|
ALU1(BFREV)
|
|
|
|
|
|
ALU3(BFE)
|
|
|
|
|
|
ALU2(BFI1)
|
|
|
|
|
|
ALU3(BFI2)
|
|
|
|
|
|
ALU1(FBH)
|
|
|
|
|
|
ALU1(FBL)
|
|
|
|
|
|
ALU1(CBIT)
|
2013-09-19 13:01:08 -07:00
|
|
|
|
ALU2(ADDC)
|
|
|
|
|
|
ALU2(SUBB)
|
2010-10-14 11:40:19 -07:00
|
|
|
|
|
2017-01-12 18:05:58 -08:00
|
|
|
|
brw_inst *
|
|
|
|
|
|
brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
|
|
|
|
|
|
{
|
|
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
|
|
|
|
|
|
|
|
|
|
|
/* When converting F->DF on IVB/BYT, every odd source channel is ignored.
|
2018-12-07 14:05:52 -08:00
|
|
|
|
* To avoid the problems that causes, we use an <X,2,0> source region to
|
|
|
|
|
|
* read each element twice.
|
2017-01-12 18:05:58 -08:00
|
|
|
|
*/
|
|
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
|
2017-01-12 18:05:58 -08:00
|
|
|
|
dest.type == BRW_REGISTER_TYPE_DF &&
|
|
|
|
|
|
(src0.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_UD) &&
|
|
|
|
|
|
!has_scalar_region(src0)) {
|
2018-12-07 14:05:52 -08:00
|
|
|
|
assert(src0.vstride == src0.width + src0.hstride);
|
|
|
|
|
|
src0.vstride = src0.hstride;
|
2017-01-12 18:05:58 -08:00
|
|
|
|
src0.width = BRW_WIDTH_2;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu1(p, BRW_OPCODE_MOV, dest, src0);
|
|
|
|
|
|
}
|
2010-10-14 11:40:19 -07:00
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ADD(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2010-09-04 21:28:04 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* 6.2.2: add */
|
|
|
|
|
|
if (src0.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src0.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_VF)) {
|
|
|
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_D);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (src1.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src1.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
|
|
src1.type == BRW_REGISTER_TYPE_VF)) {
|
|
|
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_D);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_AVG(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2012-07-07 08:28:46 -07:00
|
|
|
|
{
|
|
|
|
|
|
assert(dest.type == src0.type);
|
|
|
|
|
|
assert(src0.type == src1.type);
|
|
|
|
|
|
switch (src0.type) {
|
|
|
|
|
|
case BRW_REGISTER_TYPE_B:
|
|
|
|
|
|
case BRW_REGISTER_TYPE_UB:
|
|
|
|
|
|
case BRW_REGISTER_TYPE_W:
|
|
|
|
|
|
case BRW_REGISTER_TYPE_UW:
|
|
|
|
|
|
case BRW_REGISTER_TYPE_D:
|
|
|
|
|
|
case BRW_REGISTER_TYPE_UD:
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
2014-06-29 14:54:01 -07:00
|
|
|
|
unreachable("Bad type for brw_AVG");
|
2012-07-07 08:28:46 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_MUL(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2010-09-04 21:28:04 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* 6.32.38: mul */
|
|
|
|
|
|
if (src0.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_UD ||
|
|
|
|
|
|
src1.type == BRW_REGISTER_TYPE_D ||
|
|
|
|
|
|
src1.type == BRW_REGISTER_TYPE_UD) {
|
|
|
|
|
|
assert(dest.type != BRW_REGISTER_TYPE_F);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (src0.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src0.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
|
|
src0.type == BRW_REGISTER_TYPE_VF)) {
|
|
|
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_D);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (src1.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src1.file == BRW_IMMEDIATE_VALUE &&
|
|
|
|
|
|
src1.type == BRW_REGISTER_TYPE_VF)) {
|
|
|
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_D);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
|
|
|
|
|
|
src0.nr != BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
|
|
|
|
|
|
src1.nr != BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2014-08-18 23:14:44 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_LINE(struct brw_codegen *p, struct brw_reg dest,
|
2014-08-18 23:14:44 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
|
|
|
|
|
{
|
|
|
|
|
|
src0.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
|
|
src0.width = BRW_WIDTH_1;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-06 21:46:54 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_PLN(struct brw_codegen *p, struct brw_reg dest,
|
2015-04-06 21:46:54 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
|
|
|
|
|
{
|
|
|
|
|
|
src0.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
|
|
src0.width = BRW_WIDTH_1;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
src1.vstride = BRW_VERTICAL_STRIDE_8;
|
|
|
|
|
|
src1.width = BRW_WIDTH_8;
|
|
|
|
|
|
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-28 16:08:39 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
2014-06-28 16:08:39 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-05-29 14:50:46 -07:00
|
|
|
|
const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
|
2015-02-04 18:08:47 +02:00
|
|
|
|
/* The F32TO16 instruction doesn't support 32-bit destination types in
|
|
|
|
|
|
* Align1 mode, and neither does the Gen8 implementation in terms of a
|
|
|
|
|
|
* converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
|
|
|
|
|
|
* an undocumented feature.
|
|
|
|
|
|
*/
|
|
|
|
|
|
const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
|
2015-04-14 18:00:06 -07:00
|
|
|
|
(!align16 || devinfo->gen >= 8));
|
2015-02-04 18:08:47 +02:00
|
|
|
|
brw_inst *inst;
|
2014-06-28 16:08:39 -07:00
|
|
|
|
|
|
|
|
|
|
if (align16) {
|
|
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
} else {
|
2015-02-04 18:24:47 +02:00
|
|
|
|
assert(dst.type == BRW_REGISTER_TYPE_UD ||
|
|
|
|
|
|
dst.type == BRW_REGISTER_TYPE_W ||
|
2014-06-28 16:08:39 -07:00
|
|
|
|
dst.type == BRW_REGISTER_TYPE_UW ||
|
|
|
|
|
|
dst.type == BRW_REGISTER_TYPE_HF);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-02-04 18:08:47 +02:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
|
|
|
|
|
|
if (needs_zero_fill) {
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 8) {
|
2015-02-04 18:08:47 +02:00
|
|
|
|
inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
|
2014-06-28 16:08:39 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen == 7);
|
2015-02-04 18:08:47 +02:00
|
|
|
|
inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (needs_zero_fill) {
|
2018-11-09 14:13:36 -08:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_inst_set_no_dd_clear(devinfo, inst, true);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2016-11-20 00:10:54 -08:00
|
|
|
|
inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
|
2018-11-09 14:13:36 -08:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_inst_set_no_dd_check(devinfo, inst, true);
|
2014-06-28 16:08:39 -07:00
|
|
|
|
}
|
2015-02-04 18:08:47 +02:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
return inst;
|
2014-06-28 16:08:39 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
2014-06-28 16:08:39 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-05-29 14:50:46 -07:00
|
|
|
|
bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
|
2014-06-28 16:08:39 -07:00
|
|
|
|
|
|
|
|
|
|
if (align16) {
|
|
|
|
|
|
assert(src.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
} else {
|
2015-02-04 18:24:47 +02:00
|
|
|
|
/* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
|
|
|
|
|
|
*
|
|
|
|
|
|
* Because this instruction does not have a 16-bit floating-point
|
|
|
|
|
|
* type, the source data type must be Word (W). The destination type
|
|
|
|
|
|
* must be F (Float).
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (src.type == BRW_REGISTER_TYPE_UD)
|
|
|
|
|
|
src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
|
|
|
|
|
|
|
2014-06-28 16:08:39 -07:00
|
|
|
|
assert(src.type == BRW_REGISTER_TYPE_W ||
|
|
|
|
|
|
src.type == BRW_REGISTER_TYPE_UW ||
|
|
|
|
|
|
src.type == BRW_REGISTER_TYPE_HF);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 8) {
|
2014-06-28 16:08:39 -07:00
|
|
|
|
return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
|
|
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen == 7);
|
2014-06-28 16:08:39 -07:00
|
|
|
|
return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_NOP(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
|
2016-12-03 20:14:55 -08:00
|
|
|
|
memset(insn, 0, sizeof(*insn));
|
|
|
|
|
|
brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-09-03 17:51:17 -07:00
|
|
|
|
void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
|
|
|
|
|
|
{
|
|
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
|
|
|
|
|
|
brw_inst_set_cond_modifier(p->devinfo, insn, func);
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Comparisons, if/else/endif
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_JMPI(struct brw_codegen *p, struct brw_reg index,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
unsigned predicate_control)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-05-27 22:45:16 -07:00
|
|
|
|
struct brw_reg ip = brw_ip_reg();
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2017-08-31 11:42:00 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_inst_set_pred_control(devinfo, inst, predicate_control);
|
2009-07-02 16:32:19 +08:00
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
return inst;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
push_if_stack(struct brw_codegen *p, brw_inst *inst)
|
2011-05-16 12:25:18 -07:00
|
|
|
|
{
|
2011-12-21 14:51:59 +08:00
|
|
|
|
p->if_stack[p->if_stack_depth] = inst - p->store;
|
2011-05-16 12:25:18 -07:00
|
|
|
|
|
|
|
|
|
|
p->if_stack_depth++;
|
|
|
|
|
|
if (p->if_stack_array_size <= p->if_stack_depth) {
|
|
|
|
|
|
p->if_stack_array_size *= 2;
|
2011-12-21 14:51:59 +08:00
|
|
|
|
p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
|
2011-05-16 12:25:18 -07:00
|
|
|
|
p->if_stack_array_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
static brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
pop_if_stack(struct brw_codegen *p)
|
2011-12-21 14:51:59 +08:00
|
|
|
|
{
|
|
|
|
|
|
p->if_stack_depth--;
|
|
|
|
|
|
return &p->store[p->if_stack[p->if_stack_depth]];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-12-06 12:13:32 -08:00
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
push_loop_stack(struct brw_codegen *p, brw_inst *inst)
|
2011-12-06 12:13:32 -08:00
|
|
|
|
{
|
i965: fix invalid memory write
I noticed some heap corruption running virgl tests, and valgrind
helped me to track it down to the following error:
==29272== Invalid write of size 4
==29272== at 0x90283D4: push_loop_stack (brw_eu_emit.c:1307)
==29272== by 0x9029A7D: brw_DO (brw_eu_emit.c:1750)
==29272== by 0x90554B0: fs_generator::generate_code(cfg_t const*, int) (brw_fs_generator.cpp:1999)
==29272== by 0x904491F: brw_compile_fs (brw_fs.cpp:5685)
==29272== by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272== by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272== by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272== by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272== by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)
==29272== by 0x8C84325: _mesa_link_program (shaderapi.c:1042)
==29272== by 0x8C851D7: _mesa_LinkProgram (shaderapi.c:1515)
==29272== by 0x4E4B8E8: add_shader_program (vrend_renderer.c:880)
==29272== Address 0xf2f3cb0 is 0 bytes after a block of size 112 alloc'd
==29272== at 0x4C2AA98: calloc (vg_replace_malloc.c:711)
==29272== by 0x8ED11F7: ralloc_size (ralloc.c:113)
==29272== by 0x8ED1282: rzalloc_size (ralloc.c:134)
==29272== by 0x8ED14C0: rzalloc_array_size (ralloc.c:196)
==29272== by 0x9019C7B: brw_init_codegen (brw_eu.c:291)
==29272== by 0x904F565: fs_generator::fs_generator(brw_compiler const*, void*, void*, void const*, brw_stage_prog_data*, unsigned int, bool, gl_shader_stage) (brw_fs_generator.cpp:124)
==29272== by 0x9044883: brw_compile_fs (brw_fs.cpp:5675)
==29272== by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272== by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272== by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272== by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272== by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)
if_depth_in_loop is an array of size p->loop_stack_array_size, and
push_loop_stack() will access if_depth_in_loop[p->loop_stack_depth+1],
thus the condition to grow the array should be
p->loop_stack_array_size <= (p->loop_stack_depth + 1) (it's currently
off by 2...)
This can be reproduced by running the following test with virgl test
server:
LIBGL_ALWAYS_SOFTWARE=y GALLIUM_DRIVER=virpipe bin/shader_runner
./tests/shaders/glsl-fs-unroll-explosion.shader_test -auto
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2016-03-18 20:01:07 +01:00
|
|
|
|
if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
|
2011-12-06 12:13:32 -08:00
|
|
|
|
p->loop_stack_array_size *= 2;
|
|
|
|
|
|
p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
|
|
|
|
|
|
p->loop_stack_array_size);
|
2011-12-06 12:44:41 -08:00
|
|
|
|
p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
|
|
|
|
|
|
p->loop_stack_array_size);
|
2011-12-06 12:13:32 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
p->loop_stack[p->loop_stack_depth] = inst - p->store;
|
|
|
|
|
|
p->loop_stack_depth++;
|
2011-12-06 12:44:41 -08:00
|
|
|
|
p->if_depth_in_loop[p->loop_stack_depth] = 0;
|
2011-12-06 12:13:32 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
static brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
get_inner_do_insn(struct brw_codegen *p)
|
2011-12-06 12:13:32 -08:00
|
|
|
|
{
|
|
|
|
|
|
return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* EU takes the value from the flag register and pushes it onto some
|
|
|
|
|
|
* sort of a stack (presumably merging with any flag value already on
|
|
|
|
|
|
* the stack). Within an if block, the flags at the top of the stack
|
|
|
|
|
|
* control execution on each channel of the unit, eg. on each of the
|
|
|
|
|
|
* 16 pixel values in our wm programs.
|
|
|
|
|
|
*
|
|
|
|
|
|
* When the matching 'else' instruction is reached (presumably by
|
|
|
|
|
|
* countdown of the instruction count patched in by our ELSE/ENDIF
|
2015-04-22 11:33:17 +01:00
|
|
|
|
* functions), the relevant flags are inverted.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*
|
|
|
|
|
|
* When the matching 'endif' instruction is reached, the flags are
|
|
|
|
|
|
* popped off. If the stack is now empty, normal execution resumes.
|
|
|
|
|
|
*/
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_IF(struct brw_codegen *p, unsigned execute_size)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_IF);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/* Override the defaults for this instruction:
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 6) {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
2011-12-05 13:52:16 -08:00
|
|
|
|
brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
|
|
|
|
|
brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 7) {
|
2011-12-05 13:52:16 -08:00
|
|
|
|
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
|
|
|
|
|
brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
2015-03-12 13:59:17 +01:00
|
|
|
|
brw_set_src1(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_uip(devinfo, insn, 0);
|
2014-08-10 07:10:55 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
2018-11-09 14:13:35 -08:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_uip(devinfo, insn, 0);
|
2010-10-04 15:08:03 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, execute_size);
|
|
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
|
|
|
|
|
if (!p->single_program_flow && devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
push_if_stack(p, insn);
|
2011-12-06 12:44:41 -08:00
|
|
|
|
p->if_depth_in_loop[p->loop_stack_depth]++;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-03-15 23:53:40 -07:00
|
|
|
|
/* This function is only used for gen6-style IF instructions with an
|
|
|
|
|
|
* embedded comparison (conditional modifier). It is not used on gen7.
|
|
|
|
|
|
*/
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
2011-03-16 00:00:09 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2010-10-19 12:55:04 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2010-10-19 12:55:04 -07:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_IF);
|
|
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_imm_w(0));
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2010-10-19 12:55:04 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE);
|
|
|
|
|
|
assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
|
|
|
|
|
|
brw_inst_set_cond_modifier(devinfo, insn, conditional);
|
2010-10-19 12:55:04 -07:00
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
push_if_stack(p, insn);
|
2010-10-19 12:55:04 -07:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
convert_IF_ELSE_to_ADD(struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *if_inst, brw_inst *else_inst)
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* The next instruction (where the ENDIF would be, if it existed) */
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *next_inst = &p->store[p->nr_insn];
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
assert(p->single_program_flow);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
|
|
|
|
|
|
assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
|
|
|
|
|
|
assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
/* Convert IF to an ADD instruction that moves the instruction pointer
|
|
|
|
|
|
* to the first instruction of the ELSE block. If there is no ELSE
|
|
|
|
|
|
* block, point to where ENDIF would be. Reverse the predicate.
|
|
|
|
|
|
*
|
|
|
|
|
|
* There's no need to execute an ENDIF since we don't need to do any
|
|
|
|
|
|
* stack operations, and if we're currently executing, we just want to
|
|
|
|
|
|
* continue normally.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_ADD);
|
|
|
|
|
|
brw_inst_set_pred_inv(devinfo, if_inst, true);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
if (else_inst != NULL) {
|
|
|
|
|
|
/* Convert ELSE to an ADD instruction that points where the ENDIF
|
|
|
|
|
|
* would be.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_opcode(devinfo, else_inst, BRW_OPCODE_ADD);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16);
|
|
|
|
|
|
brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Patch IF and ELSE instructions with appropriate jump targets.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
patch_IF_ELSE(struct brw_codegen *p,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2009-07-15 10:40:16 +08:00
|
|
|
|
|
i965: Only convert if/else to conditional adds prior to Gen6.
Normally when outputting instructions in SPF (single program flow)
mode, we convert IF and ELSE instructions to conditional ADD
instructions applied to the IP register. On platforms prior to Gen6,
flow control instructions cause an implied thread switch, so this is a
significant savings.
However, according to the SandyBridge PRM (Volume 4 part 2, p79):
[Errata DevSNB{WA}] - When SPF is ON, IP may not be updated by
non-flow control instructions.
So we have to disable this optimization on Gen6.
On later platforms, there is no significant benefit to converting flow
control instructions to ADDs, so for the sake of consistency, this
patch disables the optimization on later platforms too.
The reason we never noticed this problem before is that so far we
haven't needed to use SPF mode on Gen6. However, later patches in
this series will introduce a Gen6 GS program which uses SPF mode.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-11-24 21:41:07 -08:00
|
|
|
|
/* We shouldn't be patching IF and ELSE instructions in single program flow
|
|
|
|
|
|
* mode when gen < 6, because in single program flow mode on those
|
|
|
|
|
|
* platforms, we convert flow control instructions to conditional ADDs that
|
|
|
|
|
|
* operate on IP (see brw_ENDIF).
|
|
|
|
|
|
*
|
|
|
|
|
|
* However, on Gen6, writing to IP doesn't work in single program flow mode
|
|
|
|
|
|
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
|
|
|
|
|
|
* not be updated by non-flow control instructions."). And on later
|
|
|
|
|
|
* platforms, there is no significant benefit to converting control flow
|
|
|
|
|
|
* instructions to conditional ADDs. So we do patch IF and ELSE
|
|
|
|
|
|
* instructions in single program flow mode on those platforms.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
i965: Only convert if/else to conditional adds prior to Gen6.
Normally when outputting instructions in SPF (single program flow)
mode, we convert IF and ELSE instructions to conditional ADD
instructions applied to the IP register. On platforms prior to Gen6,
flow control instructions cause an implied thread switch, so this is a
significant savings.
However, according to the SandyBridge PRM (Volume 4 part 2, p79):
[Errata DevSNB{WA}] - When SPF is ON, IP may not be updated by
non-flow control instructions.
So we have to disable this optimization on Gen6.
On later platforms, there is no significant benefit to converting flow
control instructions to ADDs, so for the sake of consistency, this
patch disables the optimization on later platforms too.
The reason we never noticed this problem before is that so far we
haven't needed to use SPF mode on Gen6. However, later patches in
this series will introduce a Gen6 GS program which uses SPF mode.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-11-24 21:41:07 -08:00
|
|
|
|
assert(!p->single_program_flow);
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
assert(endif_inst != NULL);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
unsigned br = brw_jump_scale(devinfo);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_opcode(devinfo, endif_inst) == BRW_OPCODE_ENDIF);
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
if (else_inst == NULL) {
|
|
|
|
|
|
/* Patch IF -> ENDIF */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* Turn it into an IFF, which means no mask stack operations for
|
|
|
|
|
|
* all-false and jumping past the ENDIF.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF);
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, if_inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
br * (endif_inst - if_inst + 1));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
|
|
|
|
|
|
} else if (devinfo->gen == 6) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
|
2011-03-15 23:53:40 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
|
|
|
|
|
|
brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
2006-10-18 00:24:01 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
/* Patch IF -> ELSE */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, if_inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
br * (else_inst - if_inst));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
|
|
|
|
|
|
} else if (devinfo->gen == 6) {
|
|
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, if_inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
br * (else_inst - if_inst + 1));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Patch ELSE -> ENDIF */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
|
|
|
|
|
|
* matching ENDIF.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, else_inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
br * (endif_inst - else_inst + 1));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, else_inst, 1);
|
|
|
|
|
|
} else if (devinfo->gen == 6) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, else_inst,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
br * (endif_inst - else_inst));
|
2011-03-15 23:53:40 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
/* The IF instruction's JIP should point just past the ELSE */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
|
2011-03-15 23:53:40 -07:00
|
|
|
|
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
|
|
|
|
|
|
brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
|
|
|
|
|
|
if (devinfo->gen >= 8) {
|
2014-06-30 08:05:42 -07:00
|
|
|
|
/* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
|
|
|
|
|
|
* should point to ENDIF.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
|
2014-06-30 08:05:42 -07:00
|
|
|
|
}
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
2006-10-18 00:24:01 -07:00
|
|
|
|
}
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ELSE(struct brw_codegen *p)
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_ELSE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 6) {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 7) {
|
2011-03-15 23:53:40 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2015-03-12 13:59:17 +01:00
|
|
|
|
brw_set_src1(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_uip(devinfo, insn, 0);
|
2014-08-10 07:10:55 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2018-11-09 14:13:35 -08:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_uip(devinfo, insn, 0);
|
2010-10-04 15:08:03 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
|
|
|
|
|
if (!p->single_program_flow && devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
push_if_stack(p, insn);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ENDIF(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = NULL;
|
|
|
|
|
|
brw_inst *else_inst = NULL;
|
|
|
|
|
|
brw_inst *if_inst = NULL;
|
|
|
|
|
|
brw_inst *tmp;
|
2011-12-21 15:32:02 +08:00
|
|
|
|
bool emit_endif = true;
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
i965: Only convert if/else to conditional adds prior to Gen6.
Normally when outputting instructions in SPF (single program flow)
mode, we convert IF and ELSE instructions to conditional ADD
instructions applied to the IP register. On platforms prior to Gen6,
flow control instructions cause an implied thread switch, so this is a
significant savings.
However, according to the SandyBridge PRM (Volume 4 part 2, p79):
[Errata DevSNB{WA}] - When SPF is ON, IP may not be updated by
non-flow control instructions.
So we have to disable this optimization on Gen6.
On later platforms, there is no significant benefit to converting flow
control instructions to ADDs, so for the sake of consistency, this
patch disables the optimization on later platforms too.
The reason we never noticed this problem before is that so far we
haven't needed to use SPF mode on Gen6. However, later patches in
this series will introduce a Gen6 GS program which uses SPF mode.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-11-24 21:41:07 -08:00
|
|
|
|
/* In single program flow mode, we can express IF and ELSE instructions
|
|
|
|
|
|
* equivalently as ADD instructions that operate on IP. On platforms prior
|
|
|
|
|
|
* to Gen6, flow control instructions cause an implied thread switch, so
|
|
|
|
|
|
* this is a significant savings.
|
|
|
|
|
|
*
|
|
|
|
|
|
* However, on Gen6, writing to IP doesn't work in single program flow mode
|
|
|
|
|
|
* (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
|
|
|
|
|
|
* not be updated by non-flow control instructions."). And on later
|
|
|
|
|
|
* platforms, there is no significant benefit to converting control flow
|
|
|
|
|
|
* instructions to conditional ADDs. So we only do this trick on Gen4 and
|
|
|
|
|
|
* Gen5.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6 && p->single_program_flow)
|
2011-12-21 15:32:02 +08:00
|
|
|
|
emit_endif = false;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2015-04-22 11:33:17 +01:00
|
|
|
|
* A single next_insn() may change the base address of instruction store
|
2011-12-21 15:32:02 +08:00
|
|
|
|
* memory(p->store), so call it first before referencing the instruction
|
|
|
|
|
|
* store pointer from an index
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (emit_endif)
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_ENDIF);
|
|
|
|
|
|
|
|
|
|
|
|
/* Pop the IF and (optional) ELSE instructions from the stack */
|
|
|
|
|
|
p->if_depth_in_loop[p->loop_stack_depth]--;
|
|
|
|
|
|
tmp = pop_if_stack(p);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (brw_inst_opcode(devinfo, tmp) == BRW_OPCODE_ELSE) {
|
2011-12-21 15:32:02 +08:00
|
|
|
|
else_inst = tmp;
|
|
|
|
|
|
tmp = pop_if_stack(p);
|
|
|
|
|
|
}
|
|
|
|
|
|
if_inst = tmp;
|
|
|
|
|
|
|
|
|
|
|
|
if (!emit_endif) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* ENDIF is useless; don't bother emitting it. */
|
|
|
|
|
|
convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
2015-07-06 18:23:57 +03:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 6) {
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
brw_set_dest(p, insn, brw_imm_w(0));
|
|
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen == 7) {
|
2011-03-15 23:53:40 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2015-03-12 13:59:17 +01:00
|
|
|
|
brw_set_src1(p, insn, brw_imm_w(0));
|
2014-08-10 07:10:55 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
|
|
|
|
|
if (devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/* Also pop item off the stack in the endif instruction: */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, insn, 1);
|
|
|
|
|
|
} else if (devinfo->gen == 6) {
|
|
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, 2);
|
2011-03-15 23:53:40 -07:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, 2);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
patch_IF_ELSE(p, if_inst, else_inst, insn);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_BREAK(struct brw_codegen *p)
|
2007-06-21 10:22:28 +08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2007-06-21 10:22:28 +08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_BREAK);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 8) {
|
2014-08-10 07:10:55 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
} else if (devinfo->gen >= 6) {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
2010-12-01 11:46:46 -08:00
|
|
|
|
} else {
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, insn,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
p->if_depth_in_loop[p->loop_stack_depth]);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2007-09-29 15:00:52 +08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_CONT(struct brw_codegen *p)
|
2010-12-01 14:02:14 -08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2010-12-01 14:02:14 -08:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_CONTINUE);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 8) {
|
2014-08-10 07:10:55 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0));
|
|
|
|
|
|
}
|
2010-12-01 14:02:14 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6) {
|
|
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, insn,
|
2014-08-04 14:26:26 -07:00
|
|
|
|
p->if_depth_in_loop[p->loop_stack_depth]);
|
|
|
|
|
|
}
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2007-06-21 10:22:28 +08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2020-04-25 14:59:30 -05:00
|
|
|
|
brw_HALT(struct brw_codegen *p)
|
2012-12-06 10:15:08 -08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_HALT);
|
|
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2020-04-25 14:59:30 -05:00
|
|
|
|
if (devinfo->gen < 6) {
|
|
|
|
|
|
/* From the Gen4 PRM:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "IP register must be put (for example, by the assembler) at <dst>
|
|
|
|
|
|
* and <src0> locations.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
|
|
|
|
|
|
} else if (devinfo->gen < 8) {
|
2014-08-10 07:10:55 -07:00
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
|
2018-11-09 14:13:35 -08:00
|
|
|
|
} else if (devinfo->gen < 12) {
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
2014-08-10 07:10:55 -07:00
|
|
|
|
}
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
2016-05-18 15:29:27 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2012-12-06 10:15:08 -08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* DO/WHILE loop:
|
2010-12-01 10:45:52 -08:00
|
|
|
|
*
|
|
|
|
|
|
* The DO/WHILE is just an unterminated loop -- break or continue are
|
|
|
|
|
|
* used for control within the loop. We have a few ways they can be
|
|
|
|
|
|
* done.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
|
|
|
|
|
|
* jip and no DO instruction.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For non-uniform control flow pre-gen6, there's a DO instruction to
|
|
|
|
|
|
* push the mask, and a WHILE to jump back, and BREAK to get out and
|
|
|
|
|
|
* pop the mask.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For gen6, there's no more mask stack, so no need for DO. WHILE
|
|
|
|
|
|
* just points back to the first instruction of the loop.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_DO(struct brw_codegen *p, unsigned execute_size)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2010-12-01 10:45:52 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6 || p->single_program_flow) {
|
2011-12-06 12:13:32 -08:00
|
|
|
|
push_loop_stack(p, &p->store[p->nr_insn]);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
return &p->store[p->nr_insn];
|
|
|
|
|
|
} else {
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-12-06 12:13:32 -08:00
|
|
|
|
push_loop_stack(p, insn);
|
|
|
|
|
|
|
2006-10-18 00:24:01 -07:00
|
|
|
|
/* Override the defaults for this instruction:
|
|
|
|
|
|
*/
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_null_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_null_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_null_reg());
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, execute_size);
|
|
|
|
|
|
brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2006-10-18 00:24:01 -07:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-12-06 12:30:03 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
|
|
|
|
|
|
* instruction here.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
|
|
|
|
|
|
* nesting, since it can always just point to the end of the block/current loop.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
|
2011-12-06 12:30:03 -08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *do_inst = get_inner_do_insn(p);
|
|
|
|
|
|
brw_inst *inst;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
unsigned br = brw_jump_scale(devinfo);
|
2011-12-06 12:30:03 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen < 6);
|
2014-06-30 09:22:27 -07:00
|
|
|
|
|
2011-12-06 12:30:03 -08:00
|
|
|
|
for (inst = while_inst - 1; inst != do_inst; inst--) {
|
|
|
|
|
|
/* If the jump count is != 0, that means that this instruction has already
|
|
|
|
|
|
* been patched because it's part of a loop inside of the one we're
|
|
|
|
|
|
* patching.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK &&
|
|
|
|
|
|
brw_inst_gen4_jump_count(devinfo, inst) == 0) {
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
|
|
|
|
|
|
} else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE &&
|
|
|
|
|
|
brw_inst_gen4_jump_count(devinfo, inst) == 0) {
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, inst, br * (while_inst - inst));
|
2011-12-06 12:30:03 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_WHILE(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn, *do_insn;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
unsigned br = brw_jump_scale(devinfo);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2006-10-18 00:24:01 -07:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_WHILE);
|
2011-12-21 15:32:02 +08:00
|
|
|
|
do_insn = get_inner_do_insn(p);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 8) {
|
2014-08-10 07:10:55 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2018-11-09 14:13:35 -08:00
|
|
|
|
if (devinfo->gen < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
|
|
|
|
|
|
} else if (devinfo->gen == 7) {
|
2014-08-10 07:06:36 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
2015-03-12 13:59:17 +01:00
|
|
|
|
brw_set_src1(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
|
2014-08-10 07:06:36 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_dest(p, insn, brw_imm_w(0));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, br * (do_insn - insn));
|
2014-08-10 07:06:36 -07:00
|
|
|
|
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2016-05-18 19:17:31 -07:00
|
|
|
|
|
2010-12-01 10:45:52 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
if (p->single_program_flow) {
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_ADD);
|
2011-12-21 15:32:02 +08:00
|
|
|
|
do_insn = get_inner_do_insn(p);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
|
2010-12-01 10:45:52 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_WHILE);
|
2011-12-21 15:32:02 +08:00
|
|
|
|
do_insn = get_inner_do_insn(p);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_opcode(devinfo, do_insn) == BRW_OPCODE_DO);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_ip_reg());
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn));
|
|
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
|
|
|
|
|
|
brw_inst_set_gen4_pop_count(devinfo, insn, 0);
|
2011-12-06 12:30:03 -08:00
|
|
|
|
|
|
|
|
|
|
brw_patch_break_cont(p, insn);
|
2010-12-01 10:45:52 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-12-06 12:30:03 -08:00
|
|
|
|
p->loop_stack_depth--;
|
|
|
|
|
|
|
2007-06-21 10:22:28 +08:00
|
|
|
|
return insn;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-05 15:03:07 +02:00
|
|
|
|
/* FORWARD JUMPS:
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
|
2014-06-05 15:03:07 +02:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *jmp_insn = &p->store[jmp_insn_idx];
|
2014-06-05 15:03:07 +02:00
|
|
|
|
unsigned jmpi = 1;
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 5)
|
2014-06-05 15:03:07 +02:00
|
|
|
|
jmpi = 2;
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI);
|
|
|
|
|
|
assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE);
|
2014-06-05 15:03:07 +02:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen4_jump_count(devinfo, jmp_insn,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
jmpi * (p->nr_insn - jmp_insn_idx - 1));
|
2014-06-05 15:03:07 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* To integrate with the above, it makes sense that the comparison
|
|
|
|
|
|
* instruction should populate the flag register. It might be simpler
|
|
|
|
|
|
* just to use the flag reg for most WM tasks?
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_CMP(struct brw_codegen *p,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned conditional,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_cond_modifier(devinfo, insn, conditional);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2013-04-15 14:59:09 -07:00
|
|
|
|
/* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
|
|
|
|
|
|
* page says:
|
|
|
|
|
|
* "Any CMP instruction with a null destination must use a {switch}."
|
2013-04-21 00:18:11 -07:00
|
|
|
|
*
|
|
|
|
|
|
* It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
|
|
|
|
|
|
* mentioned on their work-arounds pages.
|
2013-04-15 14:59:09 -07:00
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen == 7) {
|
2013-04-15 14:59:09 -07:00
|
|
|
|
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
|
|
|
|
dest.nr == BRW_ARF_NULL) {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
|
2013-04-15 14:59:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-13 14:11:30 -08:00
|
|
|
|
void brw_CMPN(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
unsigned conditional,
|
|
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
|
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
|
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_cond_modifier(devinfo, insn, conditional);
|
|
|
|
|
|
brw_set_dest(p, insn, dest);
|
|
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
|
|
|
|
|
|
|
|
|
|
|
/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
|
|
|
|
|
|
* says:
|
|
|
|
|
|
*
|
|
|
|
|
|
* If the destination is the null register, the {Switch} instruction
|
|
|
|
|
|
* option must be used.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Page 77 of the Haswell PRM Volume 2b contains the same text.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (devinfo->gen == 7) {
|
|
|
|
|
|
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
|
|
|
|
dest.nr == BRW_ARF_NULL) {
|
|
|
|
|
|
brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Helpers for the various SEND message types:
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2009-03-31 10:49:41 -06:00
|
|
|
|
/** Extended math function, float[8].
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void gen4_math(struct brw_codegen *p,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned function,
|
|
|
|
|
|
unsigned msg_reg_nr,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg src,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned precision )
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
2014-08-29 21:10:32 -07:00
|
|
|
|
unsigned data_type;
|
2014-12-22 19:29:22 -08:00
|
|
|
|
if (has_scalar_region(src)) {
|
2014-08-29 21:10:32 -07:00
|
|
|
|
data_type = BRW_MATH_DATA_SCALAR;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
data_type = BRW_MATH_DATA_VECTOR;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen < 6);
|
2010-02-24 09:16:19 -08:00
|
|
|
|
|
2014-06-07 01:56:12 -07:00
|
|
|
|
/* Example code doesn't set predicate_control for send
|
|
|
|
|
|
* instructions.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_pred_control(devinfo, insn, 0);
|
|
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
|
2011-09-28 17:37:52 -07:00
|
|
|
|
|
2014-06-07 01:56:12 -07:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
|
|
|
|
|
brw_set_src0(p, insn, src);
|
|
|
|
|
|
brw_set_math_message(p,
|
|
|
|
|
|
insn,
|
|
|
|
|
|
function,
|
|
|
|
|
|
src.type == BRW_REGISTER_TYPE_D,
|
|
|
|
|
|
precision,
|
|
|
|
|
|
data_type);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void gen6_math(struct brw_codegen *p,
|
2010-08-22 01:33:57 -07:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned function,
|
2010-08-22 01:33:57 -07:00
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen >= 6);
|
2014-06-07 01:56:12 -07:00
|
|
|
|
|
2013-02-11 11:06:13 -08:00
|
|
|
|
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
|
2015-04-14 18:00:06 -07:00
|
|
|
|
(devinfo->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
|
2010-10-11 13:30:12 -07:00
|
|
|
|
|
|
|
|
|
|
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen == 6) {
|
2016-06-03 12:32:15 -07:00
|
|
|
|
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
|
|
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
|
2011-10-18 12:24:47 -07:00
|
|
|
|
}
|
2010-10-11 13:30:12 -07:00
|
|
|
|
|
2011-09-28 17:37:51 -07:00
|
|
|
|
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
|
|
|
|
|
function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
|
|
|
|
|
|
function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
|
|
|
|
|
|
assert(src0.type != BRW_REGISTER_TYPE_F);
|
|
|
|
|
|
assert(src1.type != BRW_REGISTER_TYPE_F);
|
2014-07-11 15:54:11 -07:00
|
|
|
|
assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
|
2015-04-14 18:00:06 -07:00
|
|
|
|
(devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
|
2011-09-28 17:37:51 -07:00
|
|
|
|
} else {
|
2018-04-26 10:26:22 +02:00
|
|
|
|
assert(src0.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
|
|
|
|
|
|
assert(src1.type == BRW_REGISTER_TYPE_F ||
|
|
|
|
|
|
(src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
|
2010-10-11 13:30:12 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-10-18 12:24:47 -07:00
|
|
|
|
/* Source modifiers are ignored for extended math instructions on Gen6. */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen == 6) {
|
2011-10-18 12:24:47 -07:00
|
|
|
|
assert(!src0.negate);
|
|
|
|
|
|
assert(!src0.abs);
|
|
|
|
|
|
assert(!src1.negate);
|
|
|
|
|
|
assert(!src1.abs);
|
|
|
|
|
|
}
|
2010-12-07 14:50:50 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_math_function(devinfo, insn, function);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-11-23 19:18:26 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Return the right surface index to access the thread scratch space using
|
|
|
|
|
|
* stateless dataport messages.
|
|
|
|
|
|
*/
|
|
|
|
|
|
unsigned
|
|
|
|
|
|
brw_scratch_surface_idx(const struct brw_codegen *p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* The scratch space is thread-local so IA coherency is unnecessary. */
|
|
|
|
|
|
if (p->devinfo->gen >= 8)
|
|
|
|
|
|
return GEN8_BTI_STATELESS_NON_COHERENT;
|
|
|
|
|
|
else
|
|
|
|
|
|
return BRW_BTI_STATELESS;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2009-03-31 10:49:41 -06:00
|
|
|
|
/**
|
2010-10-19 09:25:51 -07:00
|
|
|
|
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
|
|
|
|
|
|
* using a constant offset per channel.
|
|
|
|
|
|
*
|
|
|
|
|
|
* The offset must be aligned to oword size (16 bytes). Used for
|
|
|
|
|
|
* register spilling.
|
2009-03-31 10:49:41 -06:00
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_oword_block_write_scratch(struct brw_codegen *p,
|
2010-10-22 12:57:00 -07:00
|
|
|
|
struct brw_reg mrf,
|
|
|
|
|
|
int num_regs,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned offset)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:36:16 +03:00
|
|
|
|
const unsigned target_cache =
|
|
|
|
|
|
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
|
|
|
|
|
|
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
|
2018-07-09 16:12:59 -07:00
|
|
|
|
BRW_SFID_DATAPORT_WRITE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2016-05-16 15:47:39 -07:00
|
|
|
|
uint32_t msg_type;
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6)
|
2011-04-14 19:36:28 -07:00
|
|
|
|
offset /= 16;
|
|
|
|
|
|
|
2010-10-21 14:40:49 -07:00
|
|
|
|
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
2016-05-16 15:47:39 -07:00
|
|
|
|
const unsigned mlen = 1 + num_regs;
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2010-10-21 14:40:49 -07:00
|
|
|
|
/* Set up the message header. This is g0, with g0.2 filled with
|
|
|
|
|
|
* the offset. We don't want to leave our offset around in g0 or
|
|
|
|
|
|
* it'll screw up texture samples, so set it up inside the message
|
|
|
|
|
|
* reg.
|
|
|
|
|
|
*/
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
|
|
|
|
|
brw_push_insn_state(p);
|
2015-04-14 12:40:34 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2010-10-21 14:40:49 -07:00
|
|
|
|
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
2009-03-31 10:49:41 -06:00
|
|
|
|
/* set message header global offset field (reg 0, element 2) */
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2006-08-09 19:14:05 +00:00
|
|
|
|
brw_MOV(p,
|
2010-10-21 14:40:49 -07:00
|
|
|
|
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
|
|
|
|
|
|
mrf.nr,
|
|
|
|
|
|
2), BRW_REGISTER_TYPE_UD),
|
|
|
|
|
|
brw_imm_ud(offset));
|
2009-06-26 20:38:07 +02:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
brw_pop_insn_state(p);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
{
|
2010-07-21 13:07:12 -07:00
|
|
|
|
struct brw_reg dest;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
2010-07-21 13:07:12 -07:00
|
|
|
|
int send_commit_msg;
|
2010-10-19 09:25:51 -07:00
|
|
|
|
struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
|
|
|
|
|
|
BRW_REGISTER_TYPE_UW);
|
2010-07-21 13:07:12 -07:00
|
|
|
|
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, target_cache);
|
2016-05-18 15:29:27 -07:00
|
|
|
|
brw_inst_set_compression(devinfo, insn, false);
|
|
|
|
|
|
|
|
|
|
|
|
if (brw_inst_exec_size(devinfo, insn) >= 16)
|
2010-10-19 09:25:51 -07:00
|
|
|
|
src_header = vec16(src_header);
|
2016-05-18 15:29:27 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
|
|
|
|
|
|
if (devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
|
2010-07-21 13:07:12 -07:00
|
|
|
|
|
|
|
|
|
|
/* Until gen6, writes followed by reads from the same location
|
|
|
|
|
|
* are not guaranteed to be ordered unless write_commit is set.
|
|
|
|
|
|
* If set, then a no-op write is issued to the destination
|
|
|
|
|
|
* register to set a dependency, and a read from the destination
|
|
|
|
|
|
* can be used to ensure the ordering.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For gen6, only writes between different threads need ordering
|
|
|
|
|
|
* protection. Our use of DP writes is all about register
|
|
|
|
|
|
* spilling within a thread.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2010-07-21 15:54:53 -07:00
|
|
|
|
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
|
2010-07-21 13:07:12 -07:00
|
|
|
|
send_commit_msg = 0;
|
|
|
|
|
|
} else {
|
2010-10-19 09:25:51 -07:00
|
|
|
|
dest = src_header;
|
2010-07-21 13:07:12 -07:00
|
|
|
|
send_commit_msg = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, mrf);
|
2011-04-14 19:36:28 -07:00
|
|
|
|
} else {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_null_reg());
|
2011-04-14 19:36:28 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6)
|
2011-04-14 19:36:28 -07:00
|
|
|
|
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
|
|
|
|
|
|
else
|
|
|
|
|
|
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, mlen, send_commit_msg, true) |
|
|
|
|
|
|
brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
|
|
|
|
|
|
BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
|
|
|
|
|
|
msg_type, 0, /* not a render target */
|
|
|
|
|
|
send_commit_msg));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2009-03-31 10:49:41 -06:00
|
|
|
|
/**
|
2010-10-19 09:25:51 -07:00
|
|
|
|
* Read a block of owords (half a GRF each) from the scratch buffer
|
|
|
|
|
|
* using a constant index per channel.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Offset must be aligned to oword size (16 bytes). Used for register
|
|
|
|
|
|
* spilling.
|
2009-03-31 10:49:41 -06:00
|
|
|
|
*/
|
2010-10-19 09:25:51 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_oword_block_read_scratch(struct brw_codegen *p,
|
2010-10-22 12:57:00 -07:00
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
struct brw_reg mrf,
|
|
|
|
|
|
int num_regs,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned offset)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6)
|
2011-04-14 19:36:28 -07:00
|
|
|
|
offset /= 16;
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (p->devinfo->gen >= 7) {
|
2014-10-24 12:22:04 -07:00
|
|
|
|
/* On gen 7 and above, we no longer have message registers and we can
|
|
|
|
|
|
* send from any register we want. By using the destination register
|
|
|
|
|
|
* for the message, we guarantee that the implied message write won't
|
|
|
|
|
|
* accidentally overwrite anything. This has been a problem because
|
|
|
|
|
|
* the MRF registers and source for the final FB write are both fixed
|
|
|
|
|
|
* and may overlap.
|
|
|
|
|
|
*/
|
|
|
|
|
|
mrf = retype(dest, BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
}
|
2010-10-19 09:25:51 -07:00
|
|
|
|
dest = retype(dest, BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
|
2016-05-16 15:47:39 -07:00
|
|
|
|
const unsigned rlen = num_regs;
|
2015-04-23 14:36:16 +03:00
|
|
|
|
const unsigned target_cache =
|
|
|
|
|
|
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
|
|
|
|
|
|
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
|
2018-06-07 10:50:20 -07:00
|
|
|
|
BRW_SFID_DATAPORT_READ);
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
|
|
|
|
|
brw_push_insn_state(p);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2015-04-14 12:40:34 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2010-10-21 14:40:49 -07:00
|
|
|
|
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
2009-03-31 10:49:41 -06:00
|
|
|
|
/* set message header global offset field (reg 0, element 2) */
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2014-10-24 12:22:04 -07:00
|
|
|
|
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
|
2009-06-26 20:38:07 +02:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
brw_pop_insn_state(p);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
{
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, target_cache);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_pred_control(devinfo, insn) == 0);
|
2016-05-18 15:29:27 -07:00
|
|
|
|
brw_inst_set_compression(devinfo, insn, false);
|
2010-10-19 09:25:51 -07:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest); /* UW? */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, mrf);
|
2011-04-14 19:36:28 -07:00
|
|
|
|
} else {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_null_reg());
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
|
2011-04-14 19:36:28 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, 1, rlen, true) |
|
|
|
|
|
|
brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
|
|
|
|
|
|
BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
|
|
|
|
|
|
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
|
|
|
|
|
|
BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-10-16 11:51:22 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
gen7_block_read_scratch(struct brw_codegen *p,
|
2013-10-16 11:51:22 -07:00
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
int num_regs,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned offset)
|
2013-10-16 11:51:22 -07:00
|
|
|
|
{
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
2016-10-04 11:15:05 +11:00
|
|
|
|
assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE);
|
2013-10-16 11:51:22 -07:00
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
|
2013-10-16 11:51:22 -07:00
|
|
|
|
|
|
|
|
|
|
/* The HW requires that the header is present; this is to get the g0.5
|
|
|
|
|
|
* scratch offset.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_set_src0(p, insn, brw_vec8_grf(0, 0));
|
|
|
|
|
|
|
|
|
|
|
|
/* According to the docs, offset is "A 12-bit HWord offset into the memory
|
|
|
|
|
|
* Immediate Memory buffer as specified by binding table 0xFF." An HWORD
|
|
|
|
|
|
* is 32 bytes, which happens to be the size of a register.
|
|
|
|
|
|
*/
|
|
|
|
|
|
offset /= REG_SIZE;
|
|
|
|
|
|
assert(offset < (1 << 12));
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
|
|
|
|
|
gen7_set_dp_scratch_message(p, insn,
|
|
|
|
|
|
false, /* scratch read */
|
|
|
|
|
|
false, /* OWords */
|
|
|
|
|
|
false, /* invalidate after read */
|
|
|
|
|
|
num_regs,
|
|
|
|
|
|
offset,
|
|
|
|
|
|
1, /* mlen: just g0 */
|
|
|
|
|
|
num_regs, /* rlen */
|
|
|
|
|
|
true); /* header present */
|
2013-10-16 11:51:22 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-03-31 10:50:55 -06:00
|
|
|
|
/**
|
2016-12-08 20:05:18 -08:00
|
|
|
|
* Read float[4] vectors from the data port constant cache.
|
2009-04-14 11:08:42 -06:00
|
|
|
|
* Location (in buffer) should be a multiple of 16.
|
2009-03-31 10:50:55 -06:00
|
|
|
|
* Used for fetching shader constants.
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_oword_block_read(struct brw_codegen *p,
|
2010-10-22 12:57:00 -07:00
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
struct brw_reg mrf,
|
|
|
|
|
|
uint32_t offset,
|
|
|
|
|
|
uint32_t bind_table_index)
|
2009-03-31 10:50:55 -06:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:36:16 +03:00
|
|
|
|
const unsigned target_cache =
|
2016-10-26 14:25:06 -07:00
|
|
|
|
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
|
2018-06-07 10:50:20 -07:00
|
|
|
|
BRW_SFID_DATAPORT_READ);
|
2018-05-29 14:50:46 -07:00
|
|
|
|
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2010-10-26 13:17:54 -07:00
|
|
|
|
|
|
|
|
|
|
/* On newer hardware, offset is in units of owords. */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6)
|
2010-10-26 13:17:54 -07:00
|
|
|
|
offset /= 16;
|
|
|
|
|
|
|
2010-10-22 12:57:00 -07:00
|
|
|
|
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
|
2009-03-31 10:50:55 -06:00
|
|
|
|
|
2010-10-22 12:57:00 -07:00
|
|
|
|
brw_push_insn_state(p);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2009-03-31 10:50:55 -06:00
|
|
|
|
|
2016-12-08 20:05:18 -08:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2010-10-22 12:57:00 -07:00
|
|
|
|
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
2009-03-31 10:50:55 -06:00
|
|
|
|
|
2010-10-22 12:57:00 -07:00
|
|
|
|
/* set message header global offset field (reg 0, element 2) */
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2010-10-22 12:57:00 -07:00
|
|
|
|
brw_MOV(p,
|
|
|
|
|
|
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
|
|
|
|
|
|
mrf.nr,
|
|
|
|
|
|
2), BRW_REGISTER_TYPE_UD),
|
|
|
|
|
|
brw_imm_ud(offset));
|
2016-12-08 20:05:18 -08:00
|
|
|
|
brw_pop_insn_state(p);
|
2009-03-31 10:50:55 -06:00
|
|
|
|
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
|
|
|
|
|
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
2010-10-22 12:57:00 -07:00
|
|
|
|
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, target_cache);
|
|
|
|
|
|
|
2010-10-22 12:57:00 -07:00
|
|
|
|
/* cast dest to a uword[8] vector */
|
|
|
|
|
|
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, mrf);
|
2010-10-26 13:17:54 -07:00
|
|
|
|
} else {
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, brw_null_reg());
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
|
2010-10-26 13:17:54 -07:00
|
|
|
|
}
|
2010-10-22 12:57:00 -07:00
|
|
|
|
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
|
|
|
|
|
|
brw_dp_read_desc(devinfo, bind_table_index,
|
|
|
|
|
|
BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
|
|
|
|
|
|
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
|
|
|
|
|
|
BRW_DATAPORT_READ_TARGET_DATA_CACHE));
|
2010-10-22 12:57:00 -07:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2009-03-31 10:50:55 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2017-01-13 14:16:12 -08:00
|
|
|
|
brw_inst *
|
|
|
|
|
|
brw_fb_WRITE(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg payload,
|
|
|
|
|
|
struct brw_reg implied_header,
|
|
|
|
|
|
unsigned msg_control,
|
|
|
|
|
|
unsigned binding_table_index,
|
|
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned response_length,
|
|
|
|
|
|
bool eot,
|
|
|
|
|
|
bool last_render_target,
|
|
|
|
|
|
bool header_present)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:36:16 +03:00
|
|
|
|
const unsigned target_cache =
|
|
|
|
|
|
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
|
2018-07-09 16:12:59 -07:00
|
|
|
|
BRW_SFID_DATAPORT_WRITE);
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_type;
|
2014-09-16 15:16:20 -07:00
|
|
|
|
struct brw_reg dest, src0;
|
2011-03-14 10:51:19 -07:00
|
|
|
|
|
2018-05-29 14:50:46 -07:00
|
|
|
|
if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16)
|
2011-03-14 10:51:19 -07:00
|
|
|
|
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
else
|
|
|
|
|
|
dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
|
2010-08-20 15:02:19 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2010-10-26 09:35:34 -07:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SENDC);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
|
|
|
|
|
}
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, target_cache);
|
2016-05-18 15:29:27 -07:00
|
|
|
|
brw_inst_set_compression(devinfo, insn, false);
|
2010-08-20 15:02:19 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 6) {
|
2011-10-07 11:59:06 -07:00
|
|
|
|
/* headerless version, just submit color payload */
|
2014-09-16 15:16:20 -07:00
|
|
|
|
src0 = payload;
|
2010-08-20 15:02:19 -07:00
|
|
|
|
|
2011-10-07 11:59:06 -07:00
|
|
|
|
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
|
2010-08-20 15:02:19 -07:00
|
|
|
|
} else {
|
2014-09-16 15:16:20 -07:00
|
|
|
|
assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, payload.nr);
|
2014-09-16 15:16:20 -07:00
|
|
|
|
src0 = implied_header;
|
2010-08-20 15:02:19 -07:00
|
|
|
|
|
2010-08-20 15:32:17 -07:00
|
|
|
|
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
|
2010-08-20 15:02:19 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length,
|
|
|
|
|
|
header_present) |
|
|
|
|
|
|
brw_dp_write_desc(devinfo, binding_table_index, msg_control,
|
|
|
|
|
|
msg_type, last_render_target,
|
|
|
|
|
|
0 /* send_commit_msg */));
|
|
|
|
|
|
brw_inst_set_eot(devinfo, insn, eot);
|
2017-01-13 14:16:12 -08:00
|
|
|
|
|
|
|
|
|
|
return insn;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2016-07-21 19:13:55 -07:00
|
|
|
|
brw_inst *
|
|
|
|
|
|
gen9_fb_READ(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload,
|
|
|
|
|
|
unsigned binding_table_index,
|
|
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned response_length,
|
|
|
|
|
|
bool per_sample)
|
|
|
|
|
|
{
|
2016-08-25 16:22:58 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2016-07-21 19:13:55 -07:00
|
|
|
|
assert(devinfo->gen >= 9);
|
|
|
|
|
|
const unsigned msg_subtype =
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1;
|
2016-07-21 19:13:55 -07:00
|
|
|
|
brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
|
|
|
|
|
|
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE);
|
2016-07-21 19:13:55 -07:00
|
|
|
|
brw_set_dest(p, insn, dst);
|
|
|
|
|
|
brw_set_src0(p, insn, payload);
|
2018-06-07 10:50:20 -07:00
|
|
|
|
brw_set_desc(
|
|
|
|
|
|
p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length, true) |
|
|
|
|
|
|
brw_dp_read_desc(devinfo, binding_table_index,
|
|
|
|
|
|
per_sample << 5 | msg_subtype,
|
|
|
|
|
|
GEN9_DATAPORT_RC_RENDER_TARGET_READ,
|
|
|
|
|
|
BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
|
2018-05-29 14:50:46 -07:00
|
|
|
|
brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
|
2016-07-21 19:13:55 -07:00
|
|
|
|
|
|
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2009-03-24 18:05:53 -06:00
|
|
|
|
/**
|
|
|
|
|
|
* Texture sample instruction.
|
|
|
|
|
|
* Note: the msg_type plus msg_length values determine exactly what kind
|
|
|
|
|
|
* of sampling operation is performed. See volume 4, page 161 of docs.
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_SAMPLE(struct brw_codegen *p,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_reg_nr,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg src0,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned binding_table_index,
|
|
|
|
|
|
unsigned sampler,
|
|
|
|
|
|
unsigned msg_type,
|
|
|
|
|
|
unsigned response_length,
|
|
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned header_present,
|
|
|
|
|
|
unsigned simd_mode,
|
|
|
|
|
|
unsigned return_format)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2010-09-17 14:17:06 +08:00
|
|
|
|
|
i965/fs: Convert gen7 to using GRFs for texture messages.
Looking at Lightsmark's shaders, the way we used MRFs (or in gen7's
case, GRFs) was bad in a couple of ways. One was that it prevented
compute-to-MRF for the common case of a texcoord that gets used
exactly once, but where the texcoord setup all gets emitted before the
texture calls (such as when it's a bare fragment shader input, which
gets interpolated before processing main()). Another was that it
introduced a bunch of dependencies that constrained scheduling, and
forced waits for texture operations to be done before they are
required. For example, we can now move the compute-to-MRF
interpolation for the second texture send down after the first send.
The downside is that this generally prevents
remove_duplicate_mrf_writes() from doing anything, whereas previously
it avoided work for the case of sampling from the same texcoord twice.
However, I suspect that most of the win that originally justified that
code was in avoiding the WAR stall on the first send, which this patch
also avoids, rather than the small cost of the extra instruction. We
see instruction count regressions in shaders in unigine, yofrankie,
savage2, hon, and gstreamer.
Improves GLB2.7 performance by 0.633628% +/- 0.491809% (n=121/125, avg of
~66fps, outliers below 61 dropped).
Improves openarena performance by 1.01092% +/- 0.66897% (n=425).
No significant difference on Lightsmark (n=44).
v2: Squash in the fix for register unspilling for send-from-GRF, fixing a
segfault in lightsmark.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Matt Turner <mattst88@gmail.com>
2013-10-09 17:17:59 -07:00
|
|
|
|
if (msg_reg_nr != -1)
|
|
|
|
|
|
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2013-02-05 16:21:07 -08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
2018-06-02 15:15:15 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
|
2013-09-27 15:23:56 +08:00
|
|
|
|
|
|
|
|
|
|
/* From the 965 PRM (volume 4, part 1, section 14.2.41):
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Instruction compression is not allowed for this instruction (that
|
|
|
|
|
|
* is, send). The hardware behavior is undefined if this instruction is
|
|
|
|
|
|
* set as compressed. However, compress control can be set to "SecHalf"
|
|
|
|
|
|
* to affect the EMask generation."
|
|
|
|
|
|
*
|
|
|
|
|
|
* No similar wording is found in later PRMs, but there are examples
|
|
|
|
|
|
* utilizing send with SecHalf. More importantly, SIMD8 sampler messages
|
|
|
|
|
|
* are allowed in SIMD16 mode and they could not work without SecHalf. For
|
|
|
|
|
|
* these reasons, we allow BRW_COMPRESSION_2NDHALF here.
|
|
|
|
|
|
*/
|
2016-05-18 15:29:27 -07:00
|
|
|
|
brw_inst_set_compression(devinfo, insn, false);
|
2013-09-27 15:23:56 +08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2013-02-05 16:21:07 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
|
|
|
|
|
brw_set_src0(p, insn, src0);
|
2018-06-02 15:15:15 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length,
|
|
|
|
|
|
header_present) |
|
|
|
|
|
|
brw_sampler_desc(devinfo, binding_table_index, sampler,
|
|
|
|
|
|
msg_type, simd_mode, return_format));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-08-10 11:49:31 +12:00
|
|
|
|
/* Adjust the message header's sampler state pointer to
|
|
|
|
|
|
* select the correct group of 16 samplers.
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
|
2014-08-10 11:49:31 +12:00
|
|
|
|
struct brw_reg header,
|
2015-01-22 13:46:44 -08:00
|
|
|
|
struct brw_reg sampler_index)
|
2014-08-10 11:49:31 +12:00
|
|
|
|
{
|
|
|
|
|
|
/* The "Sampler Index" field can only store values between 0 and 15.
|
|
|
|
|
|
* However, we can add an offset to the "Sampler State Pointer"
|
|
|
|
|
|
* field, effectively selecting a different set of 16 samplers.
|
|
|
|
|
|
*
|
|
|
|
|
|
* The "Sampler State Pointer" needs to be aligned to a 32-byte
|
|
|
|
|
|
* offset, and each sampler state is only 16-bytes, so we can't
|
|
|
|
|
|
* exclusively use the offset - we have to use both.
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-08-10 11:49:31 +12:00
|
|
|
|
|
|
|
|
|
|
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
const int sampler_state_size = 16; /* 16 bytes */
|
2015-10-22 19:41:30 -07:00
|
|
|
|
uint32_t sampler = sampler_index.ud;
|
2014-08-10 11:49:31 +12:00
|
|
|
|
|
|
|
|
|
|
if (sampler >= 16) {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->is_haswell || devinfo->gen >= 8);
|
2014-08-10 11:49:31 +12:00
|
|
|
|
brw_ADD(p,
|
|
|
|
|
|
get_element_ud(header, 3),
|
|
|
|
|
|
get_element_ud(brw_vec8_grf(0, 0), 3),
|
|
|
|
|
|
brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
2014-08-10 11:39:44 +12:00
|
|
|
|
/* Non-const sampler array indexing case */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 8 && !devinfo->is_haswell) {
|
2014-08-10 11:39:44 +12:00
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-01-22 13:46:44 -08:00
|
|
|
|
struct brw_reg temp = get_element_ud(header, 3);
|
2014-08-10 11:39:44 +12:00
|
|
|
|
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_push_insn_state(p);
|
2014-11-05 06:41:13 +13:00
|
|
|
|
brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2014-08-10 11:39:44 +12:00
|
|
|
|
brw_SHL(p, temp, temp, brw_imm_ud(4));
|
|
|
|
|
|
brw_ADD(p,
|
|
|
|
|
|
get_element_ud(header, 3),
|
|
|
|
|
|
get_element_ud(brw_vec8_grf(0, 0), 3),
|
|
|
|
|
|
temp);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_pop_insn_state(p);
|
2014-08-10 11:49:31 +12:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* All these variables are pretty confusing - we might be better off
|
|
|
|
|
|
* using bitmasks and macros for this, in the old style. Or perhaps
|
|
|
|
|
|
* just having the caller instantiate the fields in dword3 itself.
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_urb_WRITE(struct brw_codegen *p,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_reg_nr,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg src0,
|
i965: Allow C++ type safety in the use of enum brw_urb_write_flags.
(From a suggestion by Francisco Jerez)
If an enum represents a bitfield of flags, e.g.:
enum E {
A = 1,
B = 2,
C = 4,
D = 8,
};
then C++ normally prohibits statements like this:
enum E x = A | B;
because A and B are implicitly converted to ints before OR-ing them,
and an int can't be stored in an enum without a type cast. C, on the
other hand, allows an int to be implicitly converted to an enum
without casting.
In the past we've dealt with this situation by storing flag bitfields
as ints. This avoids ugly casting at the expense of some type safety
that C++ would normally have offered (e.g. we get no warning if we
accidentally use the wrong enum type).
However, we can get the best of both worlds if we override the |
operator. The ugly casting is confined to the operator overload, and
we still get the benefit of C++ making sure we don't use the wrong
enum type.
v2: Remove unnecessary comment and unnecessary use of "enum" keyword.
Use static_cast.
Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2013-08-23 13:19:19 -07:00
|
|
|
|
enum brw_urb_write_flags flags,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned response_length,
|
|
|
|
|
|
unsigned offset,
|
|
|
|
|
|
unsigned swizzle)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2010-02-25 18:35:08 -08:00
|
|
|
|
|
2011-03-16 14:09:17 -07:00
|
|
|
|
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
2010-02-25 18:35:08 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
|
2011-04-26 17:24:38 -07:00
|
|
|
|
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
|
2011-08-18 02:15:56 -07:00
|
|
|
|
brw_push_insn_state(p);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2014-08-14 12:22:16 -07:00
|
|
|
|
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
|
|
|
|
|
|
BRW_REGISTER_TYPE_UD),
|
|
|
|
|
|
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
|
|
|
|
|
brw_imm_ud(0xff00));
|
2011-08-18 02:15:56 -07:00
|
|
|
|
brw_pop_insn_state(p);
|
2011-04-26 17:24:38 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2010-02-25 18:35:08 -08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
i965: Turn BRW_MAX_MRF into a macro that accepts a hardware generation
There are some bug reports about shaders failing to compile in gen6
because MRF 14 is used when we need to spill. For example:
https://bugs.freedesktop.org/show_bug.cgi?id=86469
https://bugs.freedesktop.org/show_bug.cgi?id=90631
Discussion in bugzilla pointed to the fact that gen6 might actually have
24 MRF registers available instead of 16, so we could use other MRF
registers and avoid these conflicts (we still need to investigate why
some shaders need up to MRF 14 anyway, since this is not expected).
Notice that the hardware docs are not clear about this fact:
SNB PRM Vol4 Part2's "Table 5-4. MRF Registers Available in Device
Hardware" says "Number per Thread" - "24 registers"
However, SNB PRM Vol4 Part1, 1.6.1 Message Register File (MRF) says:
"Normal threads should construct their messages in m1..m15. (...)
Regardless of actual hardware implementation, the thread should
not assume th at MRF addresses above m15 wrap to legal MRF registers."
Therefore experimentation was necessary to evaluate if we had these extra
MRF registers available or not. This was tested in gen6 using MRF
registers 21..23 for spilling and doing a full piglit run (all.py) forcing
spilling of everything on the FS backend. It was also tested by doing
spilling of everything on both the FS and the VS backends with a piglit run
of shader.py. In both cases no regressions were observed. In fact, many of
these tests where helped in the cases where we forced spilling, since that
triggered the same underlying problem described in the bug reports. Here are
some results using INTEL_DEBUG=spill_fs,spill_vec4 for a shader.py run on
gen6 hardware:
Using MRFs 13..15 for spilling:
crash: 2, fail: 113, pass: 6621, skip: 5461
Using MRFs 21..23 for spilling:
crash: 2, fail: 12, pass: 6722, skip: 5461
This patch sets the ground for later patches to implement spilling
using MRF registers 21..23 in gen6.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2015-09-15 16:00:26 +02:00
|
|
|
|
assert(msg_length < BRW_MAX_MRF(devinfo->gen));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_urb_message(p,
|
2009-07-13 10:48:43 +08:00
|
|
|
|
insn,
|
2013-08-10 21:13:33 -07:00
|
|
|
|
flags,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
msg_length,
|
2013-11-25 15:39:03 -08:00
|
|
|
|
response_length,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
offset,
|
|
|
|
|
|
swizzle);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2018-06-02 15:08:18 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_send_indirect_message(struct brw_codegen *p,
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
unsigned sfid,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload,
|
2018-06-02 15:07:31 -07:00
|
|
|
|
struct brw_reg desc,
|
2019-02-07 17:45:51 -06:00
|
|
|
|
unsigned desc_imm,
|
|
|
|
|
|
bool eot)
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-10-28 10:58:09 -07:00
|
|
|
|
struct brw_inst *send;
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
|
2016-01-31 18:28:42 -08:00
|
|
|
|
dst = retype(dst, BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
assert(desc.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
if (desc.file == BRW_IMMEDIATE_VALUE) {
|
2015-10-28 10:58:09 -07:00
|
|
|
|
send = next_insn(p, BRW_OPCODE_SEND);
|
2019-08-26 15:21:40 -07:00
|
|
|
|
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
|
2018-06-02 15:07:31 -07:00
|
|
|
|
brw_set_desc(p, send, desc.ud | desc_imm);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
} else {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
|
|
|
|
|
|
/* Load the indirect descriptor to an address register using OR so the
|
2018-06-02 15:08:18 -07:00
|
|
|
|
* caller can specify additional descriptor bits with the desc_imm
|
|
|
|
|
|
* immediate.
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
*/
|
2018-06-02 15:07:31 -07:00
|
|
|
|
brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
send = next_insn(p, BRW_OPCODE_SEND);
|
2019-08-26 15:21:40 -07:00
|
|
|
|
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
|
2019-09-03 12:18:38 -07:00
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen >= 12)
|
|
|
|
|
|
brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
|
|
|
|
|
|
else
|
|
|
|
|
|
brw_set_src1(p, send, addr);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_set_dest(p, send, dst);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, send, sfid);
|
2019-02-07 17:45:51 -06:00
|
|
|
|
brw_inst_set_eot(devinfo, send, eot);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2018-11-15 15:17:06 -06:00
|
|
|
|
void
|
|
|
|
|
|
brw_send_indirect_split_message(struct brw_codegen *p,
|
|
|
|
|
|
unsigned sfid,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload0,
|
|
|
|
|
|
struct brw_reg payload1,
|
|
|
|
|
|
struct brw_reg desc,
|
|
|
|
|
|
unsigned desc_imm,
|
|
|
|
|
|
struct brw_reg ex_desc,
|
2019-02-07 17:45:51 -06:00
|
|
|
|
unsigned ex_desc_imm,
|
|
|
|
|
|
bool eot)
|
2018-11-15 15:17:06 -06:00
|
|
|
|
{
|
|
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
|
|
|
|
|
struct brw_inst *send;
|
|
|
|
|
|
|
|
|
|
|
|
dst = retype(dst, BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
|
|
|
|
|
|
assert(desc.type == BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
if (desc.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
desc.ud |= desc_imm;
|
|
|
|
|
|
} else {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
|
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
|
|
|
|
|
/* Load the indirect descriptor to an address register using OR so the
|
|
|
|
|
|
* caller can specify additional descriptor bits with the desc_imm
|
|
|
|
|
|
* immediate.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
|
|
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
desc = addr;
|
2019-09-26 23:38:24 -07:00
|
|
|
|
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-08-25 23:43:29 -07:00
|
|
|
|
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
|
2021-01-27 15:28:24 -06:00
|
|
|
|
(devinfo->gen >= 12 ||
|
|
|
|
|
|
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
2018-11-15 15:17:06 -06:00
|
|
|
|
ex_desc.ud |= ex_desc_imm;
|
|
|
|
|
|
} else {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
|
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
|
|
|
|
|
/* Load the indirect extended descriptor to an address register using OR
|
|
|
|
|
|
* so the caller can specify additional descriptor bits with the
|
|
|
|
|
|
* desc_imm immediate.
|
|
|
|
|
|
*
|
2019-02-07 17:45:51 -06:00
|
|
|
|
* Even though the instruction dispatcher always pulls the SFID and EOT
|
|
|
|
|
|
* fields from the instruction itself, actual external unit which
|
|
|
|
|
|
* processes the message gets the SFID and EOT from the extended
|
|
|
|
|
|
* descriptor which comes from the address register. If we don't OR
|
|
|
|
|
|
* those two bits in, the external unit may get confused and hang.
|
2018-11-15 15:17:06 -06:00
|
|
|
|
*/
|
2019-08-25 23:43:29 -07:00
|
|
|
|
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
|
|
|
|
|
|
|
|
|
|
|
|
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
|
2020-01-22 22:54:20 -06:00
|
|
|
|
/* ex_desc bits 15:12 don't exist in the instruction encoding prior
|
|
|
|
|
|
* to Gen12, so we may have fallen back to an indirect extended
|
|
|
|
|
|
* descriptor.
|
2019-08-25 23:43:29 -07:00
|
|
|
|
*/
|
|
|
|
|
|
brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
|
|
|
|
|
|
}
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
ex_desc = addr;
|
2019-09-26 23:38:24 -07:00
|
|
|
|
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-08-25 18:12:35 -07:00
|
|
|
|
send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
brw_set_dest(p, send, dst);
|
|
|
|
|
|
brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
|
|
|
|
|
|
|
|
|
|
|
|
if (desc.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
|
|
|
|
|
|
brw_inst_set_send_desc(devinfo, send, desc.ud);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(desc.nr == BRW_ARF_ADDRESS);
|
|
|
|
|
|
assert(desc.subnr == 0);
|
|
|
|
|
|
brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
|
2019-02-05 23:22:06 -08:00
|
|
|
|
brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
|
|
|
|
|
assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
|
|
|
|
|
|
assert(ex_desc.nr == BRW_ARF_ADDRESS);
|
|
|
|
|
|
assert((ex_desc.subnr & 0x3) == 0);
|
|
|
|
|
|
brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
|
|
|
|
|
|
brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_sfid(devinfo, send, sfid);
|
2019-02-07 17:45:51 -06:00
|
|
|
|
brw_inst_set_eot(devinfo, send, eot);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2018-06-03 03:30:50 -07:00
|
|
|
|
static void
|
2015-04-23 14:21:31 +03:00
|
|
|
|
brw_send_indirect_surface_message(struct brw_codegen *p,
|
|
|
|
|
|
unsigned sfid,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload,
|
|
|
|
|
|
struct brw_reg surface,
|
2018-06-07 15:19:49 -07:00
|
|
|
|
unsigned desc_imm)
|
2015-04-23 14:21:31 +03:00
|
|
|
|
{
|
|
|
|
|
|
if (surface.file != BRW_IMMEDIATE_VALUE) {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
2015-04-23 14:21:31 +03:00
|
|
|
|
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2015-04-23 14:21:31 +03:00
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
2015-04-23 14:21:31 +03:00
|
|
|
|
|
|
|
|
|
|
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
|
|
|
|
|
|
* some surface array is accessed out of bounds.
|
|
|
|
|
|
*/
|
2018-06-03 03:30:50 -07:00
|
|
|
|
brw_AND(p, addr,
|
|
|
|
|
|
suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
|
|
|
|
|
|
BRW_GET_SWZ(surface.swizzle, 0)),
|
|
|
|
|
|
brw_imm_ud(0xff));
|
2015-04-23 14:21:31 +03:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
|
|
|
|
|
surface = addr;
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
2015-04-23 14:21:31 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-02-07 17:45:51 -06:00
|
|
|
|
brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
|
2015-04-23 14:21:31 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
2016-05-14 23:53:19 -07:00
|
|
|
|
static bool
|
2016-08-22 15:01:08 -07:00
|
|
|
|
while_jumps_before_offset(const struct gen_device_info *devinfo,
|
2016-05-14 23:53:19 -07:00
|
|
|
|
brw_inst *insn, int while_offset, int start_offset)
|
|
|
|
|
|
{
|
|
|
|
|
|
int scale = 16 / brw_jump_scale(devinfo);
|
|
|
|
|
|
int jip = devinfo->gen == 6 ? brw_inst_gen6_jump_count(devinfo, insn)
|
|
|
|
|
|
: brw_inst_jip(devinfo, insn);
|
2017-01-26 13:50:42 +11:00
|
|
|
|
assert(jip < 0);
|
2016-05-14 23:53:19 -07:00
|
|
|
|
return while_offset + jip * scale <= start_offset;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-12-01 11:46:46 -08:00
|
|
|
|
static int
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_find_next_block_end(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
int depth = 0;
|
|
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
for (offset = next_offset(devinfo, store, start_offset);
|
2014-06-07 21:15:59 -07:00
|
|
|
|
offset < p->next_insn_offset;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
offset = next_offset(devinfo, store, offset)) {
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = store + offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
switch (brw_inst_opcode(devinfo, insn)) {
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
|
|
depth++;
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
case BRW_OPCODE_ENDIF:
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
if (depth == 0)
|
|
|
|
|
|
return offset;
|
|
|
|
|
|
depth--;
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
case BRW_OPCODE_WHILE:
|
i965: Fix JIP to skip over sibling do...while loops.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
do
...
while
...
while
Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break". brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.
Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).
See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.
This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.
The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal. Whew :)
(*) Technically, the test doesn't GPU hang currently, but only
because another bug prevents it from issuing pixel interpolator
messages entirely...with that fixed, it hangs.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2016-05-14 23:54:48 -07:00
|
|
|
|
/* If the while doesn't jump before our instruction, it's the end
|
|
|
|
|
|
* of a sibling do...while loop. Ignore it.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
|
|
|
|
|
continue;
|
2016-05-24 12:23:00 -07:00
|
|
|
|
/* fallthrough */
|
i965: Fix JIP to skip over sibling do...while loops.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
do
...
while
...
while
Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break". brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.
Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).
See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.
This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.
The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal. Whew :)
(*) Technically, the test doesn't GPU hang currently, but only
because another bug prevents it from issuing pixel interpolator
messages entirely...with that fixed, it hangs.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2016-05-14 23:54:48 -07:00
|
|
|
|
case BRW_OPCODE_ELSE:
|
2012-12-12 12:47:50 -08:00
|
|
|
|
case BRW_OPCODE_HALT:
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
if (depth == 0)
|
|
|
|
|
|
return offset;
|
2018-01-23 19:23:20 -08:00
|
|
|
|
default:
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
|
|
|
|
|
return 0;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* There is no DO instruction on gen6, so to find the end of the loop
|
|
|
|
|
|
* we have to see if the loop is jumping back before our start
|
|
|
|
|
|
* instruction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static int
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_find_loop_end(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(devinfo->gen >= 6);
|
2014-06-30 08:06:43 -07:00
|
|
|
|
|
2012-02-03 12:05:05 +01:00
|
|
|
|
/* Always start after the instruction (such as a WHILE) we're trying to fix
|
|
|
|
|
|
* up.
|
|
|
|
|
|
*/
|
2015-04-14 18:00:06 -07:00
|
|
|
|
for (offset = next_offset(devinfo, store, start_offset);
|
2014-06-07 21:15:59 -07:00
|
|
|
|
offset < p->next_insn_offset;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
offset = next_offset(devinfo, store, offset)) {
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = store + offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE) {
|
2016-05-14 23:53:19 -07:00
|
|
|
|
if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
2014-05-17 12:53:56 -07:00
|
|
|
|
return offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
assert(!"not reached");
|
2014-05-17 12:53:56 -07:00
|
|
|
|
return start_offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* After program generation, go back and update the UIP and JIP of
|
2012-12-06 10:15:08 -08:00
|
|
|
|
* BREAK, CONT, and HALT instructions to their correct locations.
|
2010-12-01 11:46:46 -08:00
|
|
|
|
*/
|
|
|
|
|
|
void
|
2016-08-29 15:57:41 -07:00
|
|
|
|
brw_set_uip_jip(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
int br = brw_jump_scale(devinfo);
|
2014-06-30 08:00:25 -07:00
|
|
|
|
int scale = 16 / br;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
return;
|
|
|
|
|
|
|
2016-08-29 15:57:41 -07:00
|
|
|
|
for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn = store + offset;
|
2016-08-29 15:57:41 -07:00
|
|
|
|
assert(brw_inst_cmpt_control(devinfo, insn) == 0);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int block_end_offset = brw_find_next_block_end(p, offset);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
switch (brw_inst_opcode(devinfo, insn)) {
|
2010-12-01 11:46:46 -08:00
|
|
|
|
case BRW_OPCODE_BREAK:
|
2014-05-17 12:53:56 -07:00
|
|
|
|
assert(block_end_offset != 0);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
2011-04-30 01:17:52 -07:00
|
|
|
|
/* Gen7 UIP points to WHILE; Gen6 points just after it */
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_uip(devinfo, insn,
|
2014-05-17 12:53:56 -07:00
|
|
|
|
(brw_find_loop_end(p, offset) - offset +
|
2015-04-14 18:00:06 -07:00
|
|
|
|
(devinfo->gen == 6 ? 16 : 0)) / scale);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
break;
|
|
|
|
|
|
case BRW_OPCODE_CONTINUE:
|
2014-05-17 12:53:56 -07:00
|
|
|
|
assert(block_end_offset != 0);
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
|
|
|
|
|
brw_inst_set_uip(devinfo, insn,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
(brw_find_loop_end(p, offset) - offset) / scale);
|
2011-04-30 01:30:55 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_uip(devinfo, insn) != 0);
|
|
|
|
|
|
assert(brw_inst_jip(devinfo, insn) != 0);
|
2013-01-01 17:02:38 -08:00
|
|
|
|
break;
|
i965: Jump to the end of the next outer conditional block on ENDIFs.
From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):
"The endif instruction is also used to hop out of nested conditionals by
jumping to the end of the next outer conditional block when all
channels are disabled."
Also:
"Pseudocode:
Evaluate(WrEn);
if ( WrEn == 0 ) { // all channels false
Jump(IP + JIP);
}"
First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional. If any channels are active, it proceeds
to the next instruction (IP + 16). However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).
Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units). This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.
It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.
This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.
Reviewed-by: Eric Anholt <eric@anholt.net>
2012-12-12 02:20:05 -08:00
|
|
|
|
|
2014-08-28 13:34:22 -07:00
|
|
|
|
case BRW_OPCODE_ENDIF: {
|
|
|
|
|
|
int32_t jump = (block_end_offset == 0) ?
|
|
|
|
|
|
1 * br : (block_end_offset - offset) / scale;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen >= 7)
|
|
|
|
|
|
brw_inst_set_jip(devinfo, insn, jump);
|
i965: Jump to the end of the next outer conditional block on ENDIFs.
From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):
"The endif instruction is also used to hop out of nested conditionals by
jumping to the end of the next outer conditional block when all
channels are disabled."
Also:
"Pseudocode:
Evaluate(WrEn);
if ( WrEn == 0 ) { // all channels false
Jump(IP + JIP);
}"
First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional. If any channels are active, it proceeds
to the next instruction (IP + 16). However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).
Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units). This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.
It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.
This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.
Reviewed-by: Eric Anholt <eric@anholt.net>
2012-12-12 02:20:05 -08:00
|
|
|
|
else
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_gen6_jump_count(devinfo, insn, jump);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
break;
|
2014-08-28 13:34:22 -07:00
|
|
|
|
}
|
i965: Jump to the end of the next outer conditional block on ENDIFs.
From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):
"The endif instruction is also used to hop out of nested conditionals by
jumping to the end of the next outer conditional block when all
channels are disabled."
Also:
"Pseudocode:
Evaluate(WrEn);
if ( WrEn == 0 ) { // all channels false
Jump(IP + JIP);
}"
First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional. If any channels are active, it proceeds
to the next instruction (IP + 16). However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).
Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units). This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.
It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.
This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.
Reviewed-by: Eric Anholt <eric@anholt.net>
2012-12-12 02:20:05 -08:00
|
|
|
|
|
2012-12-06 10:15:08 -08:00
|
|
|
|
case BRW_OPCODE_HALT:
|
|
|
|
|
|
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
|
|
|
|
|
|
*
|
|
|
|
|
|
* "In case of the halt instruction not inside any conditional
|
|
|
|
|
|
* code block, the value of <JIP> and <UIP> should be the
|
|
|
|
|
|
* same. In case of the halt instruction inside conditional code
|
|
|
|
|
|
* block, the <UIP> should be the end of the program, and the
|
|
|
|
|
|
* <JIP> should be end of the most inner conditional code block."
|
|
|
|
|
|
*
|
|
|
|
|
|
* The uip will have already been set by whoever set up the
|
|
|
|
|
|
* instruction.
|
|
|
|
|
|
*/
|
2014-05-17 12:53:56 -07:00
|
|
|
|
if (block_end_offset == 0) {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
|
2012-12-06 10:15:08 -08:00
|
|
|
|
} else {
|
2015-04-14 18:00:06 -07:00
|
|
|
|
brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
2012-12-06 10:15:08 -08:00
|
|
|
|
}
|
2015-04-14 18:00:06 -07:00
|
|
|
|
assert(brw_inst_uip(devinfo, insn) != 0);
|
|
|
|
|
|
assert(brw_inst_jip(devinfo, insn) != 0);
|
2012-12-06 10:15:08 -08:00
|
|
|
|
break;
|
2018-01-23 19:23:20 -08:00
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_ff_sync(struct brw_codegen *p,
|
2009-07-13 10:48:43 +08:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_reg_nr,
|
2009-07-13 10:48:43 +08:00
|
|
|
|
struct brw_reg src0,
|
2011-10-07 12:26:50 -07:00
|
|
|
|
bool allocate,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned response_length,
|
2011-10-07 12:26:50 -07:00
|
|
|
|
bool eot)
|
2009-07-13 10:48:43 +08:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
2009-07-13 10:48:43 +08:00
|
|
|
|
|
2011-03-16 14:09:17 -07:00
|
|
|
|
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
2010-09-17 14:23:48 +08:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, brw_imm_d(0));
|
2009-07-13 10:48:43 +08:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
if (devinfo->gen < 6)
|
|
|
|
|
|
brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
|
2009-07-13 10:48:43 +08:00
|
|
|
|
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_ff_sync_message(p,
|
2010-05-13 22:15:34 -07:00
|
|
|
|
insn,
|
|
|
|
|
|
allocate,
|
|
|
|
|
|
response_length,
|
|
|
|
|
|
eot);
|
2009-07-13 10:48:43 +08:00
|
|
|
|
}
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Emit the SEND instruction necessary to generate stream output data on Gen6
|
|
|
|
|
|
* (for transform feedback).
|
|
|
|
|
|
*
|
|
|
|
|
|
* If send_commit_msg is true, this is the last piece of stream output data
|
|
|
|
|
|
* from this thread, so send the data as a committed write. According to the
|
|
|
|
|
|
* Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
|
|
|
|
|
|
* writes are complete by sending the final write as a committed write."
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_svb_write(struct brw_codegen *p,
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_reg_nr,
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
struct brw_reg src0,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned binding_table_index,
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
bool send_commit_msg)
|
|
|
|
|
|
{
|
2015-04-23 14:36:16 +03:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
|
|
|
|
|
const unsigned target_cache =
|
|
|
|
|
|
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
|
|
|
|
|
|
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
|
2018-07-09 16:12:59 -07:00
|
|
|
|
BRW_SFID_DATAPORT_WRITE);
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *insn;
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
|
|
|
|
|
|
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
|
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, insn, target_cache);
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
|
|
|
|
|
brw_set_src0(p, insn, src0);
|
2018-07-09 16:12:59 -07:00
|
|
|
|
brw_set_desc(p, insn,
|
|
|
|
|
|
brw_message_desc(devinfo, 1, send_commit_msg, true) |
|
|
|
|
|
|
brw_dp_write_desc(devinfo, binding_table_index,
|
|
|
|
|
|
0, /* msg_control: ignored */
|
|
|
|
|
|
GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
|
|
|
|
|
|
0, /* last_render_target: ignored */
|
|
|
|
|
|
send_commit_msg)); /* send_commit_msg */
|
i965 gen6: Initial implementation of transform feedback.
This patch adds basic transform feedback capability for Gen6 hardware.
This consists of several related pieces of functionality:
(1) In gen6_sol.c, we set up binding table entries for use by
transform feedback. We use one binding table entry per transform
feedback varying (this allows us to avoid doing pointer arithmetic in
the shader, since we can set up the binding table entries with the
appropriate offsets and surface pitches to place each varying at the
correct address).
(2) In brw_context.c, we advertise the hardware capabilities, which
are as follows:
MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 64
MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 4
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 16
OpenGL 3.0 requires these values to be at least 64, 4, and 4,
respectively. The reason we advertise a larger value than required
for MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS is that we have already
set aside 64 binding table entries, so we might as well make them all
available in both separate attribs and interleaved modes.
(3) We set aside a single SVBI ("streamed vertex buffer index") for
use by transform feedback. The hardware supports four independent
SVBI's, but we only need one, since vertices are added to all
transform feedback buffers at the same rate. Note: at the moment this
index is reset to 0 only when the driver is initialized. It needs to
be reset to 0 whenever BeginTransformFeedback() is called, and
otherwise preserved.
(4) In brw_gs_emit.c and brw_gs.c, we modify the geometry shader
program to output transform feedback data as a side effect.
(5) In gen6_gs_state.c, we configure the geometry shader stage to
handle the SVBI pointer correctly.
Note: ordering of vertices is not yet correct for triangle strips
(alternate triangles are improperly oriented). This will be addressed
in a future patch.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-11-28 06:55:01 -08:00
|
|
|
|
}
|
2012-11-27 14:10:52 -08:00
|
|
|
|
|
2015-02-26 12:56:19 +02:00
|
|
|
|
static unsigned
|
2020-09-22 14:10:56 -07:00
|
|
|
|
brw_surface_payload_size(unsigned num_channels,
|
2018-11-01 13:40:31 -05:00
|
|
|
|
unsigned exec_size /**< 0 for SIMD4x2 */)
|
2015-02-26 12:56:19 +02:00
|
|
|
|
{
|
2018-11-01 13:40:31 -05:00
|
|
|
|
if (exec_size == 0)
|
|
|
|
|
|
return 1; /* SIMD4x2 */
|
|
|
|
|
|
else if (exec_size <= 8)
|
2015-02-26 12:56:19 +02:00
|
|
|
|
return num_channels;
|
2018-11-01 13:40:31 -05:00
|
|
|
|
else
|
|
|
|
|
|
return 2 * num_channels;
|
2015-02-26 12:56:19 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2013-09-11 14:01:50 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_untyped_atomic(struct brw_codegen *p,
|
2015-04-23 14:21:31 +03:00
|
|
|
|
struct brw_reg dst,
|
2014-09-11 16:13:15 -07:00
|
|
|
|
struct brw_reg payload,
|
2015-04-23 14:21:31 +03:00
|
|
|
|
struct brw_reg surface,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned atomic_op,
|
|
|
|
|
|
unsigned msg_length,
|
2017-12-12 12:05:03 -08:00
|
|
|
|
bool response_expected,
|
|
|
|
|
|
bool header_present)
|
2015-02-26 12:56:19 +02:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:21:31 +03:00
|
|
|
|
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
|
|
|
|
|
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
|
|
|
|
|
GEN7_SFID_DATAPORT_DATA_CACHE);
|
2018-11-01 13:40:31 -05:00
|
|
|
|
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
|
|
|
|
|
/* SIMD4x2 untyped atomic instructions only exist on HSW+ */
|
|
|
|
|
|
const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
|
|
|
|
|
|
const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
|
|
|
|
|
|
has_simd4x2 ? 0 : 8;
|
|
|
|
|
|
const unsigned response_length =
|
2020-09-22 14:10:56 -07:00
|
|
|
|
brw_surface_payload_size(response_expected, exec_size);
|
2018-06-07 15:19:49 -07:00
|
|
|
|
const unsigned desc =
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length, header_present) |
|
2018-10-29 16:09:30 -05:00
|
|
|
|
brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
|
|
|
|
|
|
response_expected);
|
2015-02-26 13:58:21 +02:00
|
|
|
|
/* Mask out unused components -- This is especially important in Align16
|
|
|
|
|
|
* mode on generations that don't have native support for SIMD4x2 atomics,
|
|
|
|
|
|
* because unused but enabled components will cause the dataport to perform
|
|
|
|
|
|
* additional atomic operations on the addresses that happen to be in the
|
|
|
|
|
|
* uninitialized Y, Z and W coordinates of the payload.
|
|
|
|
|
|
*/
|
|
|
|
|
|
const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
|
2015-04-23 14:21:31 +03:00
|
|
|
|
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
|
|
|
|
|
|
payload, surface, desc);
|
2013-09-11 14:01:50 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2013-09-11 14:03:13 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_untyped_surface_read(struct brw_codegen *p,
|
2015-04-23 14:21:31 +03:00
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload,
|
|
|
|
|
|
struct brw_reg surface,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned msg_length,
|
2015-02-26 12:56:19 +02:00
|
|
|
|
unsigned num_channels)
|
2013-09-11 14:03:13 -07:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:21:31 +03:00
|
|
|
|
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
|
|
|
|
|
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
|
|
|
|
|
GEN7_SFID_DATAPORT_DATA_CACHE);
|
2018-11-01 13:40:31 -05:00
|
|
|
|
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
|
|
|
|
|
const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
|
2018-06-07 15:19:49 -07:00
|
|
|
|
const unsigned response_length =
|
2020-09-22 14:10:56 -07:00
|
|
|
|
brw_surface_payload_size(num_channels, exec_size);
|
2018-06-07 15:19:49 -07:00
|
|
|
|
const unsigned desc =
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length, false) |
|
2018-10-29 16:09:30 -05:00
|
|
|
|
brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
|
2013-09-11 14:03:13 -07:00
|
|
|
|
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
|
2015-04-23 14:24:14 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_untyped_surface_write(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg payload,
|
|
|
|
|
|
struct brw_reg surface,
|
|
|
|
|
|
unsigned msg_length,
|
2017-12-12 12:05:03 -08:00
|
|
|
|
unsigned num_channels,
|
|
|
|
|
|
bool header_present)
|
2015-04-23 14:24:14 +03:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:24:14 +03:00
|
|
|
|
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
|
|
|
|
|
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
|
|
|
|
|
GEN7_SFID_DATAPORT_DATA_CACHE);
|
2018-11-01 14:15:58 -05:00
|
|
|
|
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
|
|
|
|
|
/* SIMD4x2 untyped surface write instructions only exist on HSW+ */
|
|
|
|
|
|
const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
|
2018-10-29 16:09:30 -05:00
|
|
|
|
const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
|
|
|
|
|
|
has_simd4x2 ? 0 : 8;
|
2018-06-07 15:19:49 -07:00
|
|
|
|
const unsigned desc =
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_message_desc(devinfo, msg_length, 0, header_present) |
|
2018-10-29 16:09:30 -05:00
|
|
|
|
brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
|
2015-04-23 14:24:14 +03:00
|
|
|
|
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
|
2018-11-01 14:15:58 -05:00
|
|
|
|
const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
|
2015-04-23 14:28:25 +03:00
|
|
|
|
|
2018-06-07 15:27:06 -07:00
|
|
|
|
brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
|
|
|
|
|
|
payload, surface, desc);
|
2013-09-11 14:03:13 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-23 14:30:28 +03:00
|
|
|
|
static void
|
|
|
|
|
|
brw_set_memory_fence_message(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_inst *insn,
|
|
|
|
|
|
enum brw_message_target sfid,
|
2019-07-10 12:02:23 -07:00
|
|
|
|
bool commit_enable,
|
|
|
|
|
|
unsigned bti)
|
2015-04-23 14:30:28 +03:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:30:28 +03:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, insn, brw_message_desc(
|
|
|
|
|
|
devinfo, 1, (commit_enable ? 1 : 0), true));
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_sfid(devinfo, insn, sfid);
|
2015-04-23 14:30:28 +03:00
|
|
|
|
|
|
|
|
|
|
switch (sfid) {
|
|
|
|
|
|
case GEN6_SFID_DATAPORT_RENDER_CACHE:
|
|
|
|
|
|
brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE);
|
|
|
|
|
|
break;
|
|
|
|
|
|
case GEN7_SFID_DATAPORT_DATA_CACHE:
|
|
|
|
|
|
brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE);
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
unreachable("Not reached");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (commit_enable)
|
|
|
|
|
|
brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
|
2019-07-10 12:02:23 -07:00
|
|
|
|
|
|
|
|
|
|
assert(devinfo->gen >= 11 || bti == 0);
|
|
|
|
|
|
brw_inst_set_binding_table_index(devinfo, insn, bti);
|
2015-04-23 14:30:28 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
intel/fs,vec4: Pull stall logic for memory fences up into the IR
Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.
For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).
Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics. The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).
This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.
Shader-db results for IVB (i965):
total instructions in shared programs: 11971190 -> 11971200 (<.01%)
instructions in affected programs: 11482 -> 11492 (0.09%)
helped: 0
HURT: 8
HURT stats (abs) min: 1 max: 3 x̄: 1.25 x̃: 1
HURT stats (rel) min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
95% mean confidence interval for instructions value: 0.66 1.84
95% mean confidence interval for instructions %-change: 0.01% 0.27%
Instructions are HURT.
Unlike the previous code, that used the `mov g1 g2` trick to force
both `g1` and `g2` to stall, the scheduling fence will generate `mov
null g1` and `mov null g2`. During review it was decided it was not
worth keeping the special codepath for the small effect will have.
Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below
total cycles in shared programs: 341738444 -> 341710570 (<.01%)
cycles in affected programs: 7240002 -> 7212128 (-0.38%)
helped: 46
HURT: 5
helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
HURT stats (abs) min: 2 max: 1768 x̄: 646.40 x̃: 362
HURT stats (rel) min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
95% mean confidence interval for cycles value: -777.71 -315.38
95% mean confidence interval for cycles %-change: -1.42% -0.83%
Cycles are helped.
This seems to be the effect of allocating two registers separatedly
instead of a single one with size 2, which causes different register
allocation, affecting the cycle estimates.
while ICL also has not change on instruction count but report changes
negative changes in cycles
total cycles in shared programs: 352665369 -> 352707484 (0.01%)
cycles in affected programs: 9608288 -> 9650403 (0.44%)
helped: 4
HURT: 104
helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
HURT stats (abs) min: 2 max: 2016 x̄: 408.36 x̃: 48
HURT stats (rel) min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
95% mean confidence interval for cycles value: 256.67 523.24
95% mean confidence interval for cycles %-change: 0.63% 1.03%
Cycles are HURT.
AFAICT this is the result of the case above.
Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions
total instructions in shared programs: 17690586 -> 17690597 (<.01%)
instructions in affected programs: 64617 -> 64628 (0.02%)
helped: 55
HURT: 32
helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
HURT stats (abs) min: 1 max: 65 x̄: 7.44 x̃: 2
HURT stats (rel) min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 2.28
95% mean confidence interval for instructions %-change: -0.41% 0.15%
Inconclusive result (value mean confidence interval includes 0).
Now that more is done in the IR, more dependencies are visible and
more SWSB annotations are emitted. Mixed with different register
allocation decisions like above, some shaders will see more `sync
nops` while others able to avoid them.
Most of the new `sync nops` are also redundant and could be dropped,
which will be fixed in a separate change.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>
2020-01-17 15:07:44 -08:00
|
|
|
|
void
|
2015-04-23 14:30:28 +03:00
|
|
|
|
brw_memory_fence(struct brw_codegen *p,
|
2018-04-27 15:06:56 +01:00
|
|
|
|
struct brw_reg dst,
|
2019-05-22 12:20:01 -05:00
|
|
|
|
struct brw_reg src,
|
2019-05-22 12:36:17 -05:00
|
|
|
|
enum opcode send_op,
|
intel/fs,vec4: Pull stall logic for memory fences up into the IR
Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.
For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).
Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics. The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).
This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.
Shader-db results for IVB (i965):
total instructions in shared programs: 11971190 -> 11971200 (<.01%)
instructions in affected programs: 11482 -> 11492 (0.09%)
helped: 0
HURT: 8
HURT stats (abs) min: 1 max: 3 x̄: 1.25 x̃: 1
HURT stats (rel) min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
95% mean confidence interval for instructions value: 0.66 1.84
95% mean confidence interval for instructions %-change: 0.01% 0.27%
Instructions are HURT.
Unlike the previous code, that used the `mov g1 g2` trick to force
both `g1` and `g2` to stall, the scheduling fence will generate `mov
null g1` and `mov null g2`. During review it was decided it was not
worth keeping the special codepath for the small effect will have.
Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below
total cycles in shared programs: 341738444 -> 341710570 (<.01%)
cycles in affected programs: 7240002 -> 7212128 (-0.38%)
helped: 46
HURT: 5
helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
HURT stats (abs) min: 2 max: 1768 x̄: 646.40 x̃: 362
HURT stats (rel) min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
95% mean confidence interval for cycles value: -777.71 -315.38
95% mean confidence interval for cycles %-change: -1.42% -0.83%
Cycles are helped.
This seems to be the effect of allocating two registers separatedly
instead of a single one with size 2, which causes different register
allocation, affecting the cycle estimates.
while ICL also has not change on instruction count but report changes
negative changes in cycles
total cycles in shared programs: 352665369 -> 352707484 (0.01%)
cycles in affected programs: 9608288 -> 9650403 (0.44%)
helped: 4
HURT: 104
helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
HURT stats (abs) min: 2 max: 2016 x̄: 408.36 x̃: 48
HURT stats (rel) min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
95% mean confidence interval for cycles value: 256.67 523.24
95% mean confidence interval for cycles %-change: 0.63% 1.03%
Cycles are HURT.
AFAICT this is the result of the case above.
Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions
total instructions in shared programs: 17690586 -> 17690597 (<.01%)
instructions in affected programs: 64617 -> 64628 (0.02%)
helped: 55
HURT: 32
helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
HURT stats (abs) min: 1 max: 65 x̄: 7.44 x̃: 2
HURT stats (rel) min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 2.28
95% mean confidence interval for instructions %-change: -0.41% 0.15%
Inconclusive result (value mean confidence interval includes 0).
Now that more is done in the IR, more dependencies are visible and
more SWSB annotations are emitted. Mixed with different register
allocation decisions like above, some shaders will see more `sync
nops` while others able to avoid them.
Most of the new `sync nops` are also redundant and could be dropped,
which will be fixed in a separate change.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>
2020-01-17 15:07:44 -08:00
|
|
|
|
enum brw_message_target sfid,
|
|
|
|
|
|
bool commit_enable,
|
2019-07-10 12:02:23 -07:00
|
|
|
|
unsigned bti)
|
2015-04-23 14:30:28 +03:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2015-04-23 14:30:28 +03:00
|
|
|
|
|
2019-05-22 12:20:01 -05:00
|
|
|
|
dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
|
|
|
|
|
|
src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
|
2016-04-25 19:20:12 -07:00
|
|
|
|
|
2015-04-23 14:30:28 +03:00
|
|
|
|
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
|
|
|
|
|
|
* message doesn't write anything back.
|
|
|
|
|
|
*/
|
intel/fs,vec4: Pull stall logic for memory fences up into the IR
Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.
For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).
Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics. The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).
This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.
Shader-db results for IVB (i965):
total instructions in shared programs: 11971190 -> 11971200 (<.01%)
instructions in affected programs: 11482 -> 11492 (0.09%)
helped: 0
HURT: 8
HURT stats (abs) min: 1 max: 3 x̄: 1.25 x̃: 1
HURT stats (rel) min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
95% mean confidence interval for instructions value: 0.66 1.84
95% mean confidence interval for instructions %-change: 0.01% 0.27%
Instructions are HURT.
Unlike the previous code, that used the `mov g1 g2` trick to force
both `g1` and `g2` to stall, the scheduling fence will generate `mov
null g1` and `mov null g2`. During review it was decided it was not
worth keeping the special codepath for the small effect will have.
Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below
total cycles in shared programs: 341738444 -> 341710570 (<.01%)
cycles in affected programs: 7240002 -> 7212128 (-0.38%)
helped: 46
HURT: 5
helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
HURT stats (abs) min: 2 max: 1768 x̄: 646.40 x̃: 362
HURT stats (rel) min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
95% mean confidence interval for cycles value: -777.71 -315.38
95% mean confidence interval for cycles %-change: -1.42% -0.83%
Cycles are helped.
This seems to be the effect of allocating two registers separatedly
instead of a single one with size 2, which causes different register
allocation, affecting the cycle estimates.
while ICL also has not change on instruction count but report changes
negative changes in cycles
total cycles in shared programs: 352665369 -> 352707484 (0.01%)
cycles in affected programs: 9608288 -> 9650403 (0.44%)
helped: 4
HURT: 104
helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
HURT stats (abs) min: 2 max: 2016 x̄: 408.36 x̃: 48
HURT stats (rel) min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
95% mean confidence interval for cycles value: 256.67 523.24
95% mean confidence interval for cycles %-change: 0.63% 1.03%
Cycles are HURT.
AFAICT this is the result of the case above.
Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions
total instructions in shared programs: 17690586 -> 17690597 (<.01%)
instructions in affected programs: 64617 -> 64628 (0.02%)
helped: 55
HURT: 32
helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
HURT stats (abs) min: 1 max: 65 x̄: 7.44 x̃: 2
HURT stats (rel) min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 2.28
95% mean confidence interval for instructions %-change: -0.41% 0.15%
Inconclusive result (value mean confidence interval includes 0).
Now that more is done in the IR, more dependencies are visible and
more SWSB annotations are emitted. Mixed with different register
allocation decisions like above, some shaders will see more `sync
nops` while others able to avoid them.
Most of the new `sync nops` are also redundant and could be dropped,
which will be fixed in a separate change.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>
2020-01-17 15:07:44 -08:00
|
|
|
|
struct brw_inst *insn = next_insn(p, send_op);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
|
2015-04-23 14:30:28 +03:00
|
|
|
|
brw_set_dest(p, insn, dst);
|
2019-05-22 12:20:01 -05:00
|
|
|
|
brw_set_src0(p, insn, src);
|
intel/fs,vec4: Pull stall logic for memory fences up into the IR
Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.
For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).
Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics. The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).
This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.
Shader-db results for IVB (i965):
total instructions in shared programs: 11971190 -> 11971200 (<.01%)
instructions in affected programs: 11482 -> 11492 (0.09%)
helped: 0
HURT: 8
HURT stats (abs) min: 1 max: 3 x̄: 1.25 x̃: 1
HURT stats (rel) min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
95% mean confidence interval for instructions value: 0.66 1.84
95% mean confidence interval for instructions %-change: 0.01% 0.27%
Instructions are HURT.
Unlike the previous code, that used the `mov g1 g2` trick to force
both `g1` and `g2` to stall, the scheduling fence will generate `mov
null g1` and `mov null g2`. During review it was decided it was not
worth keeping the special codepath for the small effect will have.
Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below
total cycles in shared programs: 341738444 -> 341710570 (<.01%)
cycles in affected programs: 7240002 -> 7212128 (-0.38%)
helped: 46
HURT: 5
helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
HURT stats (abs) min: 2 max: 1768 x̄: 646.40 x̃: 362
HURT stats (rel) min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
95% mean confidence interval for cycles value: -777.71 -315.38
95% mean confidence interval for cycles %-change: -1.42% -0.83%
Cycles are helped.
This seems to be the effect of allocating two registers separatedly
instead of a single one with size 2, which causes different register
allocation, affecting the cycle estimates.
while ICL also has not change on instruction count but report changes
negative changes in cycles
total cycles in shared programs: 352665369 -> 352707484 (0.01%)
cycles in affected programs: 9608288 -> 9650403 (0.44%)
helped: 4
HURT: 104
helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
HURT stats (abs) min: 2 max: 2016 x̄: 408.36 x̃: 48
HURT stats (rel) min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
95% mean confidence interval for cycles value: 256.67 523.24
95% mean confidence interval for cycles %-change: 0.63% 1.03%
Cycles are HURT.
AFAICT this is the result of the case above.
Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions
total instructions in shared programs: 17690586 -> 17690597 (<.01%)
instructions in affected programs: 64617 -> 64628 (0.02%)
helped: 55
HURT: 32
helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
HURT stats (abs) min: 1 max: 65 x̄: 7.44 x̃: 2
HURT stats (rel) min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
95% mean confidence interval for instructions value: -2.03 2.28
95% mean confidence interval for instructions %-change: -0.41% 0.15%
Inconclusive result (value mean confidence interval includes 0).
Now that more is done in the IR, more dependencies are visible and
more SWSB annotations are emitted. Mixed with different register
allocation decisions like above, some shaders will see more `sync
nops` while others able to avoid them.
Most of the new `sync nops` are also redundant and could be dropped,
which will be fixed in a separate change.
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>
2020-01-17 15:07:44 -08:00
|
|
|
|
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
2015-04-23 14:30:28 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
2013-11-17 21:47:22 +13:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_pixel_interpolator_query(struct brw_codegen *p,
|
2013-11-17 21:47:22 +13:00
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
struct brw_reg mrf,
|
|
|
|
|
|
bool noperspective,
|
|
|
|
|
|
unsigned mode,
|
2015-07-17 14:40:03 +01:00
|
|
|
|
struct brw_reg data,
|
2013-11-17 21:47:22 +13:00
|
|
|
|
unsigned msg_length,
|
|
|
|
|
|
unsigned response_length)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-05-29 14:50:46 -07:00
|
|
|
|
const uint16_t exec_size = brw_get_default_exec_size(p);
|
2018-07-09 16:16:16 -07:00
|
|
|
|
const unsigned slot_group = brw_get_default_group(p) / 16;
|
|
|
|
|
|
const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
|
|
|
|
|
|
const unsigned desc =
|
|
|
|
|
|
brw_message_desc(devinfo, msg_length, response_length, false) |
|
|
|
|
|
|
brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode,
|
|
|
|
|
|
slot_group);
|
2013-11-17 21:47:22 +13:00
|
|
|
|
|
2015-07-17 14:40:03 +01:00
|
|
|
|
/* brw_send_indirect_message will automatically use a direct send message
|
|
|
|
|
|
* if data is actually immediate.
|
|
|
|
|
|
*/
|
2018-07-09 16:16:16 -07:00
|
|
|
|
brw_send_indirect_message(p,
|
|
|
|
|
|
GEN7_SFID_PIXEL_INTERPOLATOR,
|
|
|
|
|
|
dest,
|
|
|
|
|
|
mrf,
|
|
|
|
|
|
vec1(data),
|
2019-02-07 17:45:51 -06:00
|
|
|
|
desc,
|
|
|
|
|
|
false);
|
2013-11-17 21:47:22 +13:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-23 14:42:53 +03:00
|
|
|
|
void
|
2016-09-14 15:09:33 -07:00
|
|
|
|
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg mask)
|
2015-04-23 14:42:53 +03:00
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-05-29 14:50:46 -07:00
|
|
|
|
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
|
|
|
|
|
const unsigned qtr_control = brw_get_default_group(p) / 8;
|
2015-04-23 14:42:53 +03:00
|
|
|
|
brw_inst *inst;
|
|
|
|
|
|
|
|
|
|
|
|
assert(devinfo->gen >= 7);
|
2016-09-14 15:09:33 -07:00
|
|
|
|
assert(mask.type == BRW_REGISTER_TYPE_UD);
|
2015-04-23 14:42:53 +03:00
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
|
2019-01-22 11:33:11 -08:00
|
|
|
|
/* The flag register is only used on Gen7 in align1 mode, so avoid setting
|
|
|
|
|
|
* unnecessary bits in the instruction words, get the information we need
|
|
|
|
|
|
* and reset the default flag register. This allows more instructions to be
|
|
|
|
|
|
* compacted.
|
|
|
|
|
|
*/
|
|
|
|
|
|
const unsigned flag_subreg = p->current->flag_subreg;
|
|
|
|
|
|
brw_set_default_flag_reg(p, 0, 0);
|
|
|
|
|
|
|
2018-05-29 14:50:46 -07:00
|
|
|
|
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
2015-04-23 14:42:53 +03:00
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen >= 8) {
|
|
|
|
|
|
/* Getting the first active channel index is easy on Gen8: Just find
|
2016-09-14 15:09:33 -07:00
|
|
|
|
* the first bit set in the execution mask. The register exists on
|
|
|
|
|
|
* HSW already but it reads back as all ones when the current
|
2015-04-23 14:42:53 +03:00
|
|
|
|
* instruction has execution masking disabled, so it's kind of
|
|
|
|
|
|
* useless.
|
|
|
|
|
|
*/
|
2016-09-14 15:09:33 -07:00
|
|
|
|
struct brw_reg exec_mask =
|
|
|
|
|
|
retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2016-09-14 15:09:33 -07:00
|
|
|
|
if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
|
|
|
|
|
|
/* Unfortunately, ce0 does not take into account the thread
|
|
|
|
|
|
* dispatch mask, which may be a problem in cases where it's not
|
|
|
|
|
|
* tightly packed (i.e. it doesn't have the form '2^n - 1' for
|
|
|
|
|
|
* some n). Combine ce0 with the given dispatch (or vector) mask
|
|
|
|
|
|
* to mask off those channels which were never dispatched by the
|
|
|
|
|
|
* hardware.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2016-09-14 15:09:33 -07:00
|
|
|
|
brw_AND(p, vec1(dst), exec_mask, vec1(dst));
|
|
|
|
|
|
exec_mask = vec1(dst);
|
|
|
|
|
|
}
|
2015-04-23 14:42:53 +03:00
|
|
|
|
|
|
|
|
|
|
/* Quarter control has the effect of magically shifting the value of
|
2016-09-14 15:09:33 -07:00
|
|
|
|
* ce0 so you'll get the first active channel relative to the
|
|
|
|
|
|
* specified quarter control as result.
|
2015-04-23 14:42:53 +03:00
|
|
|
|
*/
|
2016-09-14 15:09:33 -07:00
|
|
|
|
inst = brw_FBL(p, vec1(dst), exec_mask);
|
2015-04-23 14:42:53 +03:00
|
|
|
|
} else {
|
2019-01-22 11:33:11 -08:00
|
|
|
|
const struct brw_reg flag = brw_flag_subreg(flag_subreg);
|
2015-04-23 14:42:53 +03:00
|
|
|
|
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2016-05-20 16:25:42 -07:00
|
|
|
|
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
|
2015-04-23 14:42:53 +03:00
|
|
|
|
|
2016-05-18 17:34:14 -07:00
|
|
|
|
/* Run enough instructions returning zero with execution masking and
|
|
|
|
|
|
* a conditional modifier enabled in order to get the full execution
|
|
|
|
|
|
* mask in f1.0. We could use a single 32-wide move here if it
|
|
|
|
|
|
* weren't because of the hardware bug that causes channel enables to
|
|
|
|
|
|
* be applied incorrectly to the second half of 32-wide instructions
|
|
|
|
|
|
* on Gen7.
|
2015-04-23 14:42:53 +03:00
|
|
|
|
*/
|
2016-05-18 17:34:14 -07:00
|
|
|
|
const unsigned lower_size = MIN2(16, exec_size);
|
|
|
|
|
|
for (unsigned i = 0; i < exec_size / lower_size; i++) {
|
|
|
|
|
|
inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
|
|
|
|
|
|
brw_imm_uw(0));
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
|
2016-05-20 16:25:42 -07:00
|
|
|
|
brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
|
2016-05-18 17:34:14 -07:00
|
|
|
|
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
|
2019-01-22 11:33:11 -08:00
|
|
|
|
brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
|
|
|
|
|
|
brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
|
2016-05-18 17:34:14 -07:00
|
|
|
|
}
|
2015-04-23 14:42:53 +03:00
|
|
|
|
|
2016-05-20 16:25:42 -07:00
|
|
|
|
/* Find the first bit set in the exec_size-wide portion of the flag
|
|
|
|
|
|
* register that was updated by the last sequence of MOV
|
|
|
|
|
|
* instructions.
|
|
|
|
|
|
*/
|
|
|
|
|
|
const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
|
2017-08-30 13:36:58 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2016-05-20 16:25:42 -07:00
|
|
|
|
brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
|
2015-04-23 14:42:53 +03:00
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
|
2016-09-14 15:09:33 -07:00
|
|
|
|
if (devinfo->gen >= 8 &&
|
|
|
|
|
|
mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
|
2015-04-23 14:42:53 +03:00
|
|
|
|
/* In SIMD4x2 mode the first active channel index is just the
|
2016-09-14 15:09:33 -07:00
|
|
|
|
* negation of the first bit of the mask register. Note that ce0
|
|
|
|
|
|
* doesn't take into account the dispatch mask, so the Gen7 path
|
|
|
|
|
|
* should be used instead unless you have the guarantee that the
|
|
|
|
|
|
* dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
|
|
|
|
|
|
* for some n).
|
2015-04-23 14:42:53 +03:00
|
|
|
|
*/
|
|
|
|
|
|
inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
|
|
|
|
|
|
negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
|
|
|
|
|
|
brw_imm_ud(1));
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
/* Overwrite the destination without and with execution masking to
|
|
|
|
|
|
* find out which of the channels is active.
|
|
|
|
|
|
*/
|
2015-12-03 11:11:14 +01:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_4);
|
2015-04-23 14:42:53 +03:00
|
|
|
|
brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
|
|
|
|
|
|
brw_imm_ud(1));
|
|
|
|
|
|
|
|
|
|
|
|
inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
|
|
|
|
|
|
brw_imm_ud(0));
|
2015-12-03 11:11:14 +01:00
|
|
|
|
brw_pop_insn_state(p);
|
2015-04-23 14:42:53 +03:00
|
|
|
|
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-02-20 20:14:24 +02:00
|
|
|
|
void
|
|
|
|
|
|
brw_broadcast(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg src,
|
|
|
|
|
|
struct brw_reg idx)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2018-05-29 14:50:46 -07:00
|
|
|
|
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
2015-02-20 20:14:24 +02:00
|
|
|
|
brw_inst *inst;
|
|
|
|
|
|
|
2016-05-19 00:10:03 -07:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
|
|
|
|
|
|
|
2015-02-20 20:14:24 +02:00
|
|
|
|
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
|
|
|
|
|
|
src.address_mode == BRW_ADDRESS_DIRECT);
|
2017-10-17 11:57:48 -07:00
|
|
|
|
assert(!src.abs && !src.negate);
|
|
|
|
|
|
assert(src.type == dst.type);
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
|
|
|
|
|
if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
|
|
|
|
|
|
idx.file == BRW_IMMEDIATE_VALUE) {
|
|
|
|
|
|
/* Trivial, the source is already uniform or the index is a constant.
|
|
|
|
|
|
* We will typically not get here if the optimizer is doing its job, but
|
|
|
|
|
|
* asserting would be mean.
|
|
|
|
|
|
*/
|
2015-10-22 19:41:30 -07:00
|
|
|
|
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
|
2020-07-17 16:22:11 -05:00
|
|
|
|
src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
|
|
|
|
|
|
stride(suboffset(src, 4 * i), 0, 4, 1);
|
|
|
|
|
|
|
|
|
|
|
|
if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) {
|
|
|
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
|
|
|
|
|
subscript(src, BRW_REGISTER_TYPE_D, 0));
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
|
|
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
|
|
|
|
|
subscript(src, BRW_REGISTER_TYPE_D, 1));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_MOV(p, dst, src);
|
|
|
|
|
|
}
|
2015-02-20 20:14:24 +02:00
|
|
|
|
} else {
|
2017-10-17 11:57:48 -07:00
|
|
|
|
/* From the Haswell PRM section "Register Region Restrictions":
|
|
|
|
|
|
*
|
|
|
|
|
|
* "The lower bits of the AddressImmediate must not overflow to
|
|
|
|
|
|
* change the register address. The lower 5 bits of Address
|
|
|
|
|
|
* Immediate when added to lower 5 bits of address register gives
|
|
|
|
|
|
* the sub-register offset. The upper bits of Address Immediate
|
|
|
|
|
|
* when added to upper bits of address register gives the register
|
|
|
|
|
|
* address. Any overflow from sub-register offset is dropped."
|
|
|
|
|
|
*
|
|
|
|
|
|
* Fortunately, for broadcast, we never have a sub-register offset so
|
|
|
|
|
|
* this isn't an issue.
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(src.subnr == 0);
|
|
|
|
|
|
|
2015-02-20 20:14:24 +02:00
|
|
|
|
if (align1) {
|
|
|
|
|
|
const struct brw_reg addr =
|
|
|
|
|
|
retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
2017-10-17 14:16:31 -07:00
|
|
|
|
unsigned offset = src.nr * REG_SIZE + src.subnr;
|
2015-02-20 20:14:24 +02:00
|
|
|
|
/* Limit in bytes of the signed indirect addressing immediate. */
|
|
|
|
|
|
const unsigned limit = 512;
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
|
|
|
|
|
|
|
|
/* Take into account the component size and horizontal stride. */
|
|
|
|
|
|
assert(src.vstride == src.hstride + src.width);
|
|
|
|
|
|
brw_SHL(p, addr, vec1(idx),
|
2019-12-06 09:20:09 -08:00
|
|
|
|
brw_imm_ud(util_logbase2(type_sz(src.type)) +
|
2015-02-20 20:14:24 +02:00
|
|
|
|
src.hstride - 1));
|
|
|
|
|
|
|
|
|
|
|
|
/* We can only address up to limit bytes using the indirect
|
|
|
|
|
|
* addressing immediate, account for the difference if the source
|
|
|
|
|
|
* register is above this limit.
|
|
|
|
|
|
*/
|
2017-10-17 14:16:31 -07:00
|
|
|
|
if (offset >= limit) {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
|
2017-10-17 14:16:31 -07:00
|
|
|
|
offset = offset % limit;
|
|
|
|
|
|
}
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
|
|
|
|
|
|
2015-02-20 20:14:24 +02:00
|
|
|
|
/* Use indirect addressing to fetch the specified component. */
|
2017-10-17 14:45:12 -07:00
|
|
|
|
if (type_sz(src.type) > 4 &&
|
2020-07-17 16:22:11 -05:00
|
|
|
|
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
|
|
|
|
|
|
!devinfo->has_64bit_float)) {
|
2017-10-17 14:45:12 -07:00
|
|
|
|
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
|
|
|
|
|
*
|
|
|
|
|
|
* "When source or destination datatype is 64b or operation is
|
|
|
|
|
|
* integer DWord multiply, indirect addressing must not be
|
|
|
|
|
|
* used."
|
|
|
|
|
|
*
|
|
|
|
|
|
* To work around both of this issue, we do two integer MOVs
|
|
|
|
|
|
* insead of one 64-bit MOV. Because no double value should ever
|
|
|
|
|
|
* cross a register boundary, it's safe to use the immediate
|
|
|
|
|
|
* offset in the indirect here to handle adding 4 bytes to the
|
|
|
|
|
|
* offset and avoid the extra ADD to the register file.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
|
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset),
|
|
|
|
|
|
BRW_REGISTER_TYPE_D));
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2017-10-17 14:45:12 -07:00
|
|
|
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
|
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset + 4),
|
|
|
|
|
|
BRW_REGISTER_TYPE_D));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
brw_MOV(p, dst,
|
|
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset), src.type));
|
|
|
|
|
|
}
|
2015-02-20 20:14:24 +02:00
|
|
|
|
} else {
|
|
|
|
|
|
/* In SIMD4x2 mode the index can be either zero or one, replicate it
|
|
|
|
|
|
* to all bits of a flag register,
|
|
|
|
|
|
*/
|
|
|
|
|
|
inst = brw_MOV(p,
|
|
|
|
|
|
brw_null_reg(),
|
2016-05-19 00:10:03 -07:00
|
|
|
|
stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
|
|
|
|
|
|
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
|
|
|
|
|
|
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
|
|
|
|
|
|
|
|
|
|
|
|
/* and use predicated SEL to pick the right channel. */
|
|
|
|
|
|
inst = brw_SEL(p, dst,
|
2016-05-19 00:10:03 -07:00
|
|
|
|
stride(suboffset(src, 4), 4, 4, 1),
|
|
|
|
|
|
stride(src, 4, 4, 1));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
|
|
|
|
|
|
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2016-05-19 00:10:03 -07:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2015-02-20 20:14:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-11-27 14:10:52 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* This instruction is generated as a single-channel align1 instruction by
|
|
|
|
|
|
* both the VS and FS stages when using INTEL_DEBUG=shader_time.
|
|
|
|
|
|
*
|
|
|
|
|
|
* We can't use the typed atomic op in the FS because that has the execution
|
|
|
|
|
|
* mask ANDed with the pixel mask, but we just want to write the one dword for
|
|
|
|
|
|
* all the pixels.
|
|
|
|
|
|
*
|
|
|
|
|
|
* We don't use the SIMD4x2 atomic ops in the VS because want to just write
|
|
|
|
|
|
* one u32. So we use the same untyped atomic write message as the pixel
|
|
|
|
|
|
* shader.
|
|
|
|
|
|
*
|
|
|
|
|
|
* The untyped atomic operation requires a BUFFER surface type with RAW
|
|
|
|
|
|
* format, and is only accessible through the legacy DATA_CACHE dataport
|
|
|
|
|
|
* messages.
|
|
|
|
|
|
*/
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_shader_time_add(struct brw_codegen *p,
|
2013-03-19 15:28:11 -07:00
|
|
|
|
struct brw_reg payload,
|
2012-11-27 14:10:52 -08:00
|
|
|
|
uint32_t surf_index)
|
|
|
|
|
|
{
|
2018-06-02 14:59:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
|
|
|
|
|
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
2015-04-23 14:21:31 +03:00
|
|
|
|
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
|
|
|
|
|
GEN7_SFID_DATAPORT_DATA_CACHE);
|
2018-06-02 14:59:08 -07:00
|
|
|
|
assert(devinfo->gen >= 7);
|
2012-11-27 14:10:52 -08:00
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
2014-05-31 16:57:02 -07:00
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2015-04-23 14:21:31 +03:00
|
|
|
|
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
2014-06-13 14:29:25 -07:00
|
|
|
|
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
2012-11-27 14:10:52 -08:00
|
|
|
|
|
|
|
|
|
|
/* We use brw_vec1_reg and unmasked because we want to increment the given
|
|
|
|
|
|
* offset only once.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
|
|
|
|
|
|
BRW_ARF_NULL, 0));
|
2013-03-19 15:28:11 -07:00
|
|
|
|
brw_set_src0(p, send, brw_vec1_reg(payload.file,
|
|
|
|
|
|
payload.nr, 0));
|
2018-06-07 15:22:58 -07:00
|
|
|
|
brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
|
2018-10-29 16:09:30 -05:00
|
|
|
|
brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
|
|
|
|
|
|
false)));
|
2018-06-07 15:22:58 -07:00
|
|
|
|
|
2018-06-02 14:59:08 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, send, sfid);
|
|
|
|
|
|
brw_inst_set_binding_table_index(devinfo, send, surf_index);
|
2015-04-23 14:21:31 +03:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2012-11-27 14:10:52 -08:00
|
|
|
|
}
|
2014-11-04 18:05:04 -08:00
|
|
|
|
|
|
|
|
|
|
|
2014-11-04 18:11:37 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* Emit the SEND message for a barrier
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_barrier(struct brw_codegen *p, struct brw_reg src)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-11-04 18:11:37 -08:00
|
|
|
|
struct brw_inst *inst;
|
|
|
|
|
|
|
|
|
|
|
|
assert(devinfo->gen >= 7);
|
|
|
|
|
|
|
2017-01-15 00:58:20 -08:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
inst = next_insn(p, BRW_OPCODE_SEND);
|
2016-01-31 18:28:42 -08:00
|
|
|
|
brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
|
2014-11-04 18:11:37 -08:00
|
|
|
|
brw_set_src0(p, inst, src);
|
|
|
|
|
|
brw_set_src1(p, inst, brw_null_reg());
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
|
2014-11-04 18:11:37 -08:00
|
|
|
|
|
2018-06-11 10:49:39 -07:00
|
|
|
|
brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
brw_inst_set_gateway_subfuncid(devinfo, inst,
|
|
|
|
|
|
BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
2017-01-15 00:58:20 -08:00
|
|
|
|
brw_pop_insn_state(p);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-11-04 18:05:04 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* Emit the wait instruction for a barrier
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_WAIT(struct brw_codegen *p)
|
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
|
const struct gen_device_info *devinfo = p->devinfo;
|
2014-11-04 18:05:04 -08:00
|
|
|
|
struct brw_inst *insn;
|
|
|
|
|
|
|
|
|
|
|
|
struct brw_reg src = brw_notification_reg();
|
|
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_WAIT);
|
|
|
|
|
|
brw_set_dest(p, insn, src);
|
|
|
|
|
|
brw_set_src0(p, insn, src);
|
|
|
|
|
|
brw_set_src1(p, insn, brw_null_reg());
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
|
|
|
|
|
|
brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
|
|
|
|
|
}
|
2017-07-01 08:12:59 +02:00
|
|
|
|
|
|
|
|
|
|
void
|
2019-09-13 01:34:35 +03:00
|
|
|
|
brw_float_controls_mode(struct brw_codegen *p,
|
|
|
|
|
|
unsigned mode, unsigned mask)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* From the Skylake PRM, Volume 7, page 760:
|
|
|
|
|
|
* "Implementation Restriction on Register Access: When the control
|
|
|
|
|
|
* register is used as an explicit source and/or destination, hardware
|
|
|
|
|
|
* does not ensure execution pipeline coherency. Software must set the
|
|
|
|
|
|
* thread control field to ‘switch’ for an instruction that uses
|
|
|
|
|
|
* control register as an explicit operand."
|
2019-09-26 23:38:24 -07:00
|
|
|
|
*
|
|
|
|
|
|
* On Gen12+ this is implemented in terms of SWSB annotations instead.
|
2019-09-13 01:34:35 +03:00
|
|
|
|
*/
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
|
|
|
|
|
brw_imm_ud(~mask));
|
|
|
|
|
|
brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
|
2019-09-26 23:36:58 -07:00
|
|
|
|
if (p->devinfo->gen < 12)
|
|
|
|
|
|
brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
|
2019-09-13 01:34:35 +03:00
|
|
|
|
|
|
|
|
|
|
if (mode) {
|
|
|
|
|
|
brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
|
|
|
|
|
brw_imm_ud(mode));
|
|
|
|
|
|
brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
|
2019-09-26 23:36:58 -07:00
|
|
|
|
if (p->devinfo->gen < 12)
|
|
|
|
|
|
brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
|
2017-07-01 08:12:59 +02:00
|
|
|
|
}
|
2019-09-26 23:38:24 -07:00
|
|
|
|
|
|
|
|
|
|
if (p->devinfo->gen >= 12)
|
|
|
|
|
|
brw_SYNC(p, TGL_SYNC_NOP);
|
2017-07-01 08:12:59 +02:00
|
|
|
|
}
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_update_reloc_imm(const struct gen_device_info *devinfo,
|
|
|
|
|
|
brw_inst *inst,
|
|
|
|
|
|
uint32_t value)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Sanity check that the instruction is a MOV of an immediate */
|
|
|
|
|
|
assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV);
|
|
|
|
|
|
assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
|
|
|
|
|
|
|
|
|
|
|
|
/* If it was compacted, we can't safely rewrite */
|
|
|
|
|
|
assert(brw_inst_cmpt_control(devinfo, inst) == 0);
|
|
|
|
|
|
|
|
|
|
|
|
brw_inst_set_imm_ud(devinfo, inst, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* A default value for constants that will be patched at run-time.
|
|
|
|
|
|
* We pick an arbitrary value that prevents instruction compaction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
#define DEFAULT_PATCH_IMM 0x4a7cc037
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_MOV_reloc_imm(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
enum brw_reg_type src_type,
|
|
|
|
|
|
uint32_t id)
|
|
|
|
|
|
{
|
|
|
|
|
|
assert(type_sz(src_type) == 4);
|
|
|
|
|
|
assert(type_sz(dst.type) == 4);
|
|
|
|
|
|
|
|
|
|
|
|
if (p->num_relocs + 1 > p->reloc_array_size) {
|
|
|
|
|
|
p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
|
|
|
|
|
|
p->relocs = reralloc(p->mem_ctx, p->relocs,
|
|
|
|
|
|
struct brw_shader_reloc, p->reloc_array_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
|
|
|
|
|
|
.id = id,
|
|
|
|
|
|
.offset = p->next_insn_offset,
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
|
|
|
|
|
|
}
|