2006-08-09 19:14:05 +00:00
|
|
|
|
/*
|
|
|
|
|
|
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
|
Intel funded Tungsten Graphics to
|
2006-08-09 19:14:05 +00:00
|
|
|
|
develop this 3D driver.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
|
|
a copy of this software and associated documentation files (the
|
|
|
|
|
|
"Software"), to deal in the Software without restriction, including
|
|
|
|
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
|
the following conditions:
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
The above copyright notice and this permission notice (including the
|
|
|
|
|
|
next paragraph) shall be included in all copies or substantial
|
|
|
|
|
|
portions of the Software.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
|
|
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
|
|
|
|
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
|
|
|
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
|
|
|
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Authors:
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
|
* Keith Whitwell <keithw@vmware.com>
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2013-11-25 15:39:03 -08:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2017-03-09 00:44:29 +00:00
|
|
|
|
#include "brw_eu_defines.h"
|
2006-08-09 19:14:05 +00:00
|
|
|
|
#include "brw_eu.h"
|
|
|
|
|
|
|
2014-02-24 23:39:14 -08:00
|
|
|
|
#include "util/ralloc.h"
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2014-06-04 17:08:57 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (dest.file == FIXED_GRF)
|
2024-09-18 14:25:30 -07:00
|
|
|
|
assert(dest.nr < XE3_MAX_GRF);
|
2009-03-13 09:17:08 -06:00
|
|
|
|
|
2019-10-23 09:18:03 -07:00
|
|
|
|
/* The hardware has a restriction where a destination of size Byte with
|
|
|
|
|
|
* a stride of 1 is only allowed for a packed byte MOV. For any other
|
|
|
|
|
|
* instruction, the stride must be at least 2, even when the destination
|
|
|
|
|
|
* is the NULL register.
|
2018-11-07 12:08:02 +01:00
|
|
|
|
*/
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (dest.file == ARF &&
|
2018-11-07 12:08:02 +01:00
|
|
|
|
dest.nr == BRW_ARF_NULL &&
|
2024-04-21 00:57:59 -07:00
|
|
|
|
brw_type_size_bytes(dest.type) == 1 &&
|
2019-10-23 09:18:03 -07:00
|
|
|
|
dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
|
2018-11-07 12:08:02 +01:00
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 12 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF ||
|
2024-12-10 10:49:08 +02:00
|
|
|
|
dest.file == ADDRESS ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
dest.file == ARF);
|
2019-08-25 18:13:42 -07:00
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(dest.subnr == 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
dest.vstride == dest.width + 1));
|
|
|
|
|
|
assert(!dest.negate && !dest.abs);
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
|
2019-08-25 18:13:42 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
} else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
|
2021-03-29 14:41:58 -07:00
|
|
|
|
assert(devinfo->ver < 12);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF ||
|
2024-12-10 10:49:08 +02:00
|
|
|
|
dest.file == ADDRESS ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
dest.file == ARF);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(dest.subnr % 16 == 0);
|
|
|
|
|
|
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
dest.vstride == dest.width + 1);
|
|
|
|
|
|
assert(!dest.negate && !dest.abs);
|
2024-03-12 22:39:42 +02:00
|
|
|
|
brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
|
|
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
|
|
|
|
|
|
brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (dest.file == FIXED_GRF) {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
assert(dest.writemask != 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
/* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
|
|
|
|
|
|
* Although Dst.HorzStride is a don't care for Align16, HW needs
|
|
|
|
|
|
* this to be programmed as "01".
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
|
2013-09-17 11:54:05 -07:00
|
|
|
|
}
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
|
2018-11-15 17:40:32 -06:00
|
|
|
|
|
|
|
|
|
|
/* These are different sizes in align1 vs align16:
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
dest.indirect_offset);
|
|
|
|
|
|
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
|
|
|
|
|
|
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
dest.indirect_offset);
|
|
|
|
|
|
/* even ignored in da16, still need to set as '01' */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2012-11-09 11:38:14 -08:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (reg.file == FIXED_GRF)
|
2024-09-18 14:25:30 -07:00
|
|
|
|
assert(reg.nr < XE3_MAX_GRF);
|
2009-03-13 09:17:08 -06:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
|
2012-11-09 11:38:14 -08:00
|
|
|
|
/* Any source modifiers or regions will be ignored, since this just
|
2024-02-21 21:21:20 -08:00
|
|
|
|
* identifies the GRF to start reading the message contents from.
|
2012-11-09 11:38:14 -08:00
|
|
|
|
* Check for some likely failures.
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(!reg.negate);
|
|
|
|
|
|
assert(!reg.abs);
|
|
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 12 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
|
2024-11-19 13:13:26 -08:00
|
|
|
|
assert(reg.file == ARF || reg.file == FIXED_GRF);
|
2019-08-25 18:13:42 -07:00
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
|
|
|
|
|
assert(!reg.negate && !reg.abs);
|
2024-11-19 13:13:26 -08:00
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
|
2019-08-25 18:13:42 -07:00
|
|
|
|
|
2024-11-19 13:13:26 -08:00
|
|
|
|
if (reg.file == ARF && reg.nr == BRW_ARF_SCALAR) {
|
|
|
|
|
|
assert(reg.subnr % 2 == 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2);
|
2024-11-19 13:13:26 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
assert(reg.subnr == 0);
|
|
|
|
|
|
}
|
2024-12-06 12:50:23 -08:00
|
|
|
|
} else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(reg.file == FIXED_GRF);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(reg.subnr % 16 == 0);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
|
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(!reg.negate && !reg.abs);
|
2024-03-12 22:39:42 +02:00
|
|
|
|
brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs);
|
|
|
|
|
|
brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate);
|
|
|
|
|
|
brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (reg.file == IMM) {
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (reg.type == BRW_TYPE_DF)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_imm_df(devinfo, inst, reg.df);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
else if (reg.type == BRW_TYPE_UQ ||
|
|
|
|
|
|
reg.type == BRW_TYPE_Q)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
else
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
|
2024-04-21 00:57:59 -07:00
|
|
|
|
if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_reg_file(devinfo, inst,
|
2024-08-20 11:48:54 -07:00
|
|
|
|
ARF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_reg_hw_type(devinfo, inst,
|
|
|
|
|
|
brw_eu_inst_src0_reg_hw_type(devinfo, inst));
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
|
|
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
|
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (reg.width == BRW_WIDTH_1 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
|
|
|
|
|
|
brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
|
|
|
|
|
|
brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
|
|
|
|
|
|
brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride);
|
|
|
|
|
|
brw_eu_inst_set_src0_width(devinfo, inst, reg.width);
|
|
|
|
|
|
brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
|
|
|
|
|
|
|
|
|
|
|
|
if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
|
|
|
|
|
|
/* This is an oddity of the fact we're using the same
|
|
|
|
|
|
* descriptions for registers in align_16 as align_1:
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-05-02 14:49:24 -07:00
|
|
|
|
void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2006-09-01 14:18:06 +00:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (reg.file == FIXED_GRF)
|
2024-09-18 14:25:30 -07:00
|
|
|
|
assert(reg.nr < XE3_MAX_GRF);
|
2009-03-13 09:17:08 -06:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
|
2021-03-29 14:41:58 -07:00
|
|
|
|
(devinfo->ver >= 12 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(reg.file == FIXED_GRF ||
|
2024-12-10 10:49:08 +02:00
|
|
|
|
reg.file == ARF ||
|
|
|
|
|
|
reg.file == ADDRESS);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(reg.subnr == 0);
|
2020-07-15 15:12:57 -07:00
|
|
|
|
assert(has_scalar_region(reg) ||
|
2019-08-25 18:13:42 -07:00
|
|
|
|
(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
|
|
|
|
|
|
reg.vstride == reg.width + 1));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(!reg.negate && !reg.abs);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Accumulator registers may be accessed explicitly as src0
|
|
|
|
|
|
* operands only."
|
|
|
|
|
|
*/
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(reg.file != ARF ||
|
2023-08-09 14:03:57 -07:00
|
|
|
|
(reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
|
2015-09-10 16:19:22 -07:00
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs);
|
|
|
|
|
|
brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* Only src1 can be immediate in two-argument instructions.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (reg.file == IMM) {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* two-argument instructions can only use 32-bit immediates */
|
2024-04-21 00:57:59 -07:00
|
|
|
|
assert(brw_type_size_bytes(reg.type) < 8);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
|
2014-06-04 16:55:59 -07:00
|
|
|
|
} else {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
/* This is a hardware restriction, which may or may not be lifted
|
|
|
|
|
|
* in the future:
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
/* assert (reg.file == FIXED_GRF); */
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
|
|
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
|
|
|
|
|
brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
2018-11-15 17:40:32 -06:00
|
|
|
|
if (reg.width == BRW_WIDTH_1 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
|
|
|
|
|
|
brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
|
|
|
|
|
|
brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
|
|
|
|
|
|
brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride);
|
|
|
|
|
|
brw_eu_inst_set_src1_width(devinfo, inst, reg.width);
|
|
|
|
|
|
brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst,
|
2018-11-15 17:40:32 -06:00
|
|
|
|
BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
|
|
|
|
|
|
|
|
|
|
|
|
if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
|
|
|
|
|
|
/* This is an oddity of the fact we're using the same
|
|
|
|
|
|
* descriptions for registers in align_16 as align_1:
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
|
2018-11-15 17:40:32 -06:00
|
|
|
|
}
|
2017-01-20 13:35:33 -08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2018-06-02 13:48:42 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* Specify the descriptor and extended descriptor immediate for a SEND(C)
|
|
|
|
|
|
* message instruction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst,
|
2024-11-20 08:12:52 -08:00
|
|
|
|
unsigned desc, unsigned ex_desc, bool gather)
|
2018-06-02 13:48:42 -07:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-11-20 08:12:52 -08:00
|
|
|
|
assert(!gather || devinfo->ver >= 30);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
|
|
|
|
|
|
brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver < 12)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_src1_file_type(devinfo, inst,
|
2024-08-20 11:48:54 -07:00
|
|
|
|
IMM, BRW_TYPE_UD);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_desc(devinfo, inst, desc);
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 9)
|
2024-11-20 08:12:52 -08:00
|
|
|
|
brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, gather);
|
2018-06-02 13:48:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2018-05-29 13:45:57 -07:00
|
|
|
|
static void
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_state(const struct brw_isa_info *isa,
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn,
|
2018-05-29 13:45:57 -07:00
|
|
|
|
const struct brw_insn_state *state)
|
|
|
|
|
|
{
|
2022-06-29 14:13:31 -07:00
|
|
|
|
const struct intel_device_info *devinfo = isa->devinfo;
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size);
|
|
|
|
|
|
brw_eu_inst_set_group(devinfo, insn, state->group);
|
|
|
|
|
|
brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control);
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 12)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_eu_inst_opcode(isa, insn)));
|
|
|
|
|
|
brw_eu_inst_set_saturate(devinfo, insn, state->saturate);
|
|
|
|
|
|
brw_eu_inst_set_pred_control(devinfo, insn, state->predicate);
|
|
|
|
|
|
brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) &&
|
2018-05-29 15:28:36 -07:00
|
|
|
|
state->access_mode == BRW_ALIGN_16) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
2018-05-29 15:28:36 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
|
|
|
|
|
brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
2018-05-29 15:28:36 -07:00
|
|
|
|
}
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 20)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2023-11-02 04:44:59 +08:00
|
|
|
|
brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
|
2020-08-07 21:59:12 -05:00
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
|
2023-11-02 04:44:59 +08:00
|
|
|
|
assert(util_is_power_of_two_or_zero(alignment));
|
2024-12-14 14:44:47 -08:00
|
|
|
|
const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
|
|
|
|
|
|
const unsigned new_nr_insn = start_insn + nr_insn;
|
|
|
|
|
|
|
|
|
|
|
|
if (p->store_size < new_nr_insn) {
|
2024-12-14 14:44:47 -08:00
|
|
|
|
p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst));
|
|
|
|
|
|
p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Memset any padding due to alignment to 0. We don't want to be hashing
|
|
|
|
|
|
* or caching a bunch of random bits we got from a memory allocation.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (p->nr_insn < start_insn) {
|
|
|
|
|
|
memset(&p->store[p->nr_insn], 0,
|
2024-12-14 14:44:47 -08:00
|
|
|
|
(start_insn - p->nr_insn) * sizeof(brw_eu_inst));
|
2020-08-07 21:59:12 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst));
|
2020-08-07 21:59:12 -05:00
|
|
|
|
p->nr_insn = new_nr_insn;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
|
|
|
|
|
|
return &p->store[start_insn];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
2023-11-02 04:44:59 +08:00
|
|
|
|
brw_realign(struct brw_codegen *p, unsigned alignment)
|
2020-08-07 21:59:12 -05:00
|
|
|
|
{
|
2023-11-02 04:44:59 +08:00
|
|
|
|
brw_append_insns(p, 0, alignment);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
|
brw_append_data(struct brw_codegen *p, void *data,
|
2023-11-02 04:44:59 +08:00
|
|
|
|
unsigned size, unsigned alignment)
|
2020-08-07 21:59:12 -05:00
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst));
|
2023-11-02 04:44:59 +08:00
|
|
|
|
void *dst = brw_append_insns(p, nr_insn, alignment);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
memcpy(dst, data, size);
|
|
|
|
|
|
|
|
|
|
|
|
/* If it's not a whole number of instructions, memset the end */
|
2024-12-14 14:44:47 -08:00
|
|
|
|
if (size < nr_insn * sizeof(brw_eu_inst))
|
|
|
|
|
|
memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size);
|
2020-08-07 21:59:12 -05:00
|
|
|
|
|
|
|
|
|
|
return dst - (void *)p->store;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-08-07 13:16:06 -07:00
|
|
|
|
#define next_insn brw_next_insn
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_next_insn(struct brw_codegen *p, unsigned opcode)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2018-05-29 13:45:57 -07:00
|
|
|
|
memset(insn, 0, sizeof(*insn));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_opcode(p->isa, insn, opcode);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
|
|
|
|
|
/* Apply the default instruction state */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_state(p->isa, insn, p->current);
|
2018-05-29 13:45:57 -07:00
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-09-04 12:09:11 -05:00
|
|
|
|
void
|
|
|
|
|
|
brw_add_reloc(struct brw_codegen *p, uint32_t id,
|
|
|
|
|
|
enum brw_shader_reloc_type type,
|
|
|
|
|
|
uint32_t offset, uint32_t delta)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (p->num_relocs + 1 > p->reloc_array_size) {
|
|
|
|
|
|
p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
|
|
|
|
|
|
p->relocs = reralloc(p->mem_ctx, p->relocs,
|
|
|
|
|
|
struct brw_shader_reloc, p->reloc_array_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
|
|
|
|
|
|
.id = id,
|
|
|
|
|
|
.type = type,
|
|
|
|
|
|
.offset = offset,
|
|
|
|
|
|
.delta = delta,
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu1(struct brw_codegen *p, unsigned opcode,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg dest, struct brw_reg src)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, opcode);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu2(struct brw_codegen *p, unsigned opcode,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2015-08-03 14:47:36 -07:00
|
|
|
|
/* 64-bit immediates are only supported on 1-src instructions */
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src0.file != IMM ||
|
2024-04-21 00:57:59 -07:00
|
|
|
|
brw_type_size_bytes(src0.type) <= 4);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src1.file != IMM ||
|
2024-04-21 00:57:59 -07:00
|
|
|
|
brw_type_size_bytes(src1.type) <= 4);
|
2015-08-03 14:47:36 -07:00
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, opcode);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-03-29 15:40:04 -07:00
|
|
|
|
static enum gfx10_align1_3src_vertical_stride
|
2021-04-05 13:19:39 -07:00
|
|
|
|
to_3src_align1_vstride(const struct intel_device_info *devinfo,
|
2018-11-09 14:13:35 -08:00
|
|
|
|
enum brw_vertical_stride vstride)
|
2017-12-11 11:44:48 -08:00
|
|
|
|
{
|
|
|
|
|
|
switch (vstride) {
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_0:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
|
2018-11-09 14:13:35 -08:00
|
|
|
|
case BRW_VERTICAL_STRIDE_1:
|
2021-03-29 14:41:58 -07:00
|
|
|
|
assert(devinfo->ver >= 12);
|
2018-11-09 14:13:35 -08:00
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
|
2017-12-11 11:44:48 -08:00
|
|
|
|
case BRW_VERTICAL_STRIDE_2:
|
2021-03-29 14:41:58 -07:00
|
|
|
|
assert(devinfo->ver < 12);
|
2017-12-11 11:44:48 -08:00
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_4:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_8:
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_16:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
|
|
|
|
|
|
default:
|
|
|
|
|
|
unreachable("invalid vstride");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-03-29 15:40:04 -07:00
|
|
|
|
static enum gfx10_align1_3src_src_horizontal_stride
|
2017-12-11 11:44:48 -08:00
|
|
|
|
to_3src_align1_hstride(enum brw_horizontal_stride hstride)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (hstride) {
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_0:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_1:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_2:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
|
|
|
|
|
|
case BRW_HORIZONTAL_STRIDE_4:
|
|
|
|
|
|
return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
|
|
|
|
|
|
default:
|
|
|
|
|
|
unreachable("invalid hstride");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
|
2010-03-22 10:05:42 -07:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst = next_insn(p, opcode);
|
2010-03-22 10:05:42 -07:00
|
|
|
|
|
2024-09-18 14:25:30 -07:00
|
|
|
|
assert(dest.nr < XE3_MAX_GRF);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
|
2024-10-25 20:03:41 -07:00
|
|
|
|
if (devinfo->ver <= 9) {
|
|
|
|
|
|
assert(src0.file != IMM && src2.file != IMM);
|
|
|
|
|
|
} else if (devinfo->ver <= 11) {
|
|
|
|
|
|
/* On Ice Lake, BFE and CSEL cannot have any immediate sources. */
|
|
|
|
|
|
assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) ||
|
|
|
|
|
|
(src0.file != IMM && src2.file != IMM));
|
|
|
|
|
|
|
|
|
|
|
|
/* On Ice Lake, DP4A and MAD can only have one immediate source. */
|
|
|
|
|
|
assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) ||
|
|
|
|
|
|
!(src0.file == IMM && src2.file == IMM));
|
|
|
|
|
|
} else {
|
|
|
|
|
|
/* Having two immediate sources is allowed, but this should have been
|
2024-12-06 11:37:57 -08:00
|
|
|
|
* converted to a regular ADD by brw_opt_algebraic.
|
2024-10-25 20:03:41 -07:00
|
|
|
|
*/
|
2024-11-18 09:25:53 -08:00
|
|
|
|
assert(opcode != BRW_OPCODE_ADD3 ||
|
2024-10-25 20:03:41 -07:00
|
|
|
|
!(src0.file == IMM && src2.file == IMM));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* BFI2 cannot have any immediate sources on any platform. */
|
|
|
|
|
|
assert(opcode != BRW_OPCODE_BFI2 ||
|
|
|
|
|
|
(src0.file != IMM && src2.file != IMM));
|
2019-07-25 18:28:06 -07:00
|
|
|
|
|
2024-09-18 14:25:30 -07:00
|
|
|
|
assert(src0.file == IMM || src0.nr < XE3_MAX_GRF);
|
|
|
|
|
|
assert(src1.file != IMM && src1.nr < XE3_MAX_GRF);
|
|
|
|
|
|
assert(src2.file == IMM || src2.nr < XE3_MAX_GRF);
|
2010-03-22 10:05:42 -07:00
|
|
|
|
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src0.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src1.address_mode == BRW_ADDRESS_DIRECT);
|
|
|
|
|
|
assert(src2.address_mode == BRW_ADDRESS_DIRECT);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF ||
|
|
|
|
|
|
(dest.file == ARF &&
|
2023-08-09 14:03:57 -07:00
|
|
|
|
(dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-04-20 23:19:43 -07:00
|
|
|
|
if (brw_type_is_float(dest.type)) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
|
2017-06-14 14:49:52 -07:00
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
|
|
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
|
2017-06-14 14:49:52 -07:00
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (src0.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_vstride(
|
2019-07-25 18:28:06 -07:00
|
|
|
|
devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst,
|
2019-07-25 18:28:06 -07:00
|
|
|
|
to_3src_align1_hstride(src0.hstride));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
}
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src1_vstride(
|
2018-11-09 14:13:35 -08:00
|
|
|
|
devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst,
|
2017-12-11 11:44:48 -08:00
|
|
|
|
to_3src_align1_hstride(src1.hstride));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (src1.file == ARF) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
|
2017-06-14 14:49:52 -07:00
|
|
|
|
}
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (src2.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst,
|
2019-07-25 18:28:06 -07:00
|
|
|
|
to_3src_align1_hstride(src2.hstride));
|
|
|
|
|
|
/* no vstride on src2 */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
|
2019-07-25 18:28:06 -07:00
|
|
|
|
}
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src0.file == FIXED_GRF ||
|
|
|
|
|
|
src0.file == IMM);
|
|
|
|
|
|
assert(src1.file == FIXED_GRF ||
|
|
|
|
|
|
(src1.file == ARF &&
|
2022-12-07 11:11:42 -08:00
|
|
|
|
src1.nr == BRW_ARF_ACCUMULATOR));
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src2.file == FIXED_GRF ||
|
|
|
|
|
|
src2.file == IMM);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 12) {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (src0.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
|
2019-04-19 13:37:17 -07:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
|
2019-04-19 13:37:17 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
|
2019-04-19 13:37:17 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (src2.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
|
2019-04-19 13:37:17 -07:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
|
2019-04-19 13:37:17 -07:00
|
|
|
|
}
|
2018-11-09 14:13:35 -08:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
|
|
|
|
|
|
brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
|
2018-11-09 14:13:35 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2017-06-14 14:49:52 -07:00
|
|
|
|
} else {
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(dest.type == BRW_TYPE_F ||
|
|
|
|
|
|
dest.type == BRW_TYPE_DF ||
|
|
|
|
|
|
dest.type == BRW_TYPE_D ||
|
|
|
|
|
|
dest.type == BRW_TYPE_UD ||
|
|
|
|
|
|
dest.type == BRW_TYPE_HF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
|
2017-06-14 14:49:52 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src0.file == FIXED_GRF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
|
2017-06-14 14:49:52 -07:00
|
|
|
|
src0.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src1.file == FIXED_GRF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
|
2017-06-14 14:49:52 -07:00
|
|
|
|
src1.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src2.file == FIXED_GRF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
|
|
|
|
|
|
brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
|
2017-06-14 14:49:52 -07:00
|
|
|
|
src2.vstride == BRW_VERTICAL_STRIDE_0);
|
|
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
/* Set both the source and destination types based on dest.type,
|
|
|
|
|
|
* ignoring the source register types. The MAD and LRP emitters ensure
|
|
|
|
|
|
* that all four types are float. The BFE and BFI2 emitters, however,
|
|
|
|
|
|
* may send us mixed D and UD types and want us to ignore that and use
|
|
|
|
|
|
* the destination type.
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
|
|
|
|
|
|
brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
|
2018-05-22 08:17:38 +02:00
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
/* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Three source instructions can use operands with mixed-mode
|
|
|
|
|
|
* precision. When SrcType field is set to :f or :hf it defines
|
|
|
|
|
|
* precision for source 0 only, and fields Src1Type and Src2Type
|
|
|
|
|
|
* define precision for other source operands:
|
|
|
|
|
|
*
|
|
|
|
|
|
* 0b = :f. Single precision Float (32-bit).
|
|
|
|
|
|
* 1b = :hf. Half precision Float (16-bit)."
|
|
|
|
|
|
*/
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src1.type == BRW_TYPE_HF)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1);
|
2024-02-15 22:09:40 -08:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src2.type == BRW_TYPE_HF)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1);
|
2013-04-17 12:23:54 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
return inst;
|
2010-03-22 10:05:42 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2024-06-17 23:27:48 -07:00
|
|
|
|
brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
|
|
|
|
|
|
enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
|
2023-09-20 12:42:24 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
|
|
|
|
|
|
{
|
|
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst = next_insn(p, opcode);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
|
2024-08-20 11:48:54 -07:00
|
|
|
|
FIXED_GRF);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-04-20 23:19:43 -07:00
|
|
|
|
if (brw_type_is_float(dest.type)) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
|
2023-09-20 12:42:24 -07:00
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
|
|
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
|
2023-09-20 12:42:24 -07:00
|
|
|
|
BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src0.file == FIXED_GRF ||
|
|
|
|
|
|
(src0.file == ARF &&
|
2023-09-20 12:42:24 -07:00
|
|
|
|
src0.nr == BRW_ARF_NULL));
|
|
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src1.file == FIXED_GRF);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src2.file == FIXED_GRF);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
2024-12-10 10:49:08 +02:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
|
|
|
|
|
|
brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
|
2023-09-20 12:42:24 -07:00
|
|
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Convenience routines.
|
|
|
|
|
|
*/
|
|
|
|
|
|
#define ALU1(OP) \
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *brw_##OP(struct brw_codegen *p, \
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define ALU2(OP) \
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *brw_##OP(struct brw_codegen *p, \
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2010-03-22 10:05:42 -07:00
|
|
|
|
#define ALU3(OP) \
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *brw_##OP(struct brw_codegen *p, \
|
2010-03-22 10:05:42 -07:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1, \
|
|
|
|
|
|
struct brw_reg src2) \
|
2018-12-08 21:50:36 -08:00
|
|
|
|
{ \
|
|
|
|
|
|
if (p->current->access_mode == BRW_ALIGN_16) { \
|
|
|
|
|
|
if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src1.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src2.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
} \
|
2010-03-22 10:05:42 -07:00
|
|
|
|
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2013-06-13 14:55:18 -07:00
|
|
|
|
#define ALU3F(OP) \
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *brw_##OP(struct brw_codegen *p, \
|
2013-06-13 14:55:18 -07:00
|
|
|
|
struct brw_reg dest, \
|
|
|
|
|
|
struct brw_reg src0, \
|
|
|
|
|
|
struct brw_reg src1, \
|
|
|
|
|
|
struct brw_reg src2) \
|
|
|
|
|
|
{ \
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(dest.type == BRW_TYPE_F || \
|
|
|
|
|
|
dest.type == BRW_TYPE_DF); \
|
|
|
|
|
|
if (dest.type == BRW_TYPE_F) { \
|
|
|
|
|
|
assert(src0.type == BRW_TYPE_F); \
|
|
|
|
|
|
assert(src1.type == BRW_TYPE_F); \
|
|
|
|
|
|
assert(src2.type == BRW_TYPE_F); \
|
|
|
|
|
|
} else if (dest.type == BRW_TYPE_DF) { \
|
|
|
|
|
|
assert(src0.type == BRW_TYPE_DF); \
|
|
|
|
|
|
assert(src1.type == BRW_TYPE_DF); \
|
|
|
|
|
|
assert(src2.type == BRW_TYPE_DF); \
|
2018-12-08 21:50:36 -08:00
|
|
|
|
} \
|
|
|
|
|
|
\
|
|
|
|
|
|
if (p->current->access_mode == BRW_ALIGN_16) { \
|
|
|
|
|
|
if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src1.swizzle = BRW_SWIZZLE_XXXX; \
|
|
|
|
|
|
if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
|
|
|
|
|
|
src2.swizzle = BRW_SWIZZLE_XXXX; \
|
2014-10-20 11:46:37 +03:00
|
|
|
|
} \
|
2013-06-13 14:55:18 -07:00
|
|
|
|
return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU2(SEL)
|
|
|
|
|
|
ALU1(NOT)
|
|
|
|
|
|
ALU2(AND)
|
|
|
|
|
|
ALU2(OR)
|
|
|
|
|
|
ALU2(XOR)
|
|
|
|
|
|
ALU2(SHR)
|
|
|
|
|
|
ALU2(SHL)
|
|
|
|
|
|
ALU2(ASR)
|
2019-05-29 11:43:30 -07:00
|
|
|
|
ALU2(ROL)
|
|
|
|
|
|
ALU2(ROR)
|
2015-11-22 20:12:17 -08:00
|
|
|
|
ALU3(CSEL)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU1(FRC)
|
|
|
|
|
|
ALU1(RNDD)
|
2020-01-16 11:17:14 -08:00
|
|
|
|
ALU1(RNDE)
|
2019-08-22 11:15:50 -05:00
|
|
|
|
ALU1(RNDU)
|
2020-01-16 11:17:14 -08:00
|
|
|
|
ALU1(RNDZ)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
ALU2(MAC)
|
|
|
|
|
|
ALU2(MACH)
|
|
|
|
|
|
ALU1(LZD)
|
|
|
|
|
|
ALU2(DP4)
|
|
|
|
|
|
ALU2(DPH)
|
|
|
|
|
|
ALU2(DP3)
|
|
|
|
|
|
ALU2(DP2)
|
2021-02-23 18:46:53 -08:00
|
|
|
|
ALU3(DP4A)
|
intel/compiler/fs: Implement FS_OPCODE_LINTERP with MADs on Gen11+
The PLN instruction is no more. Its functionality is now implemented
using two MAD instructions with the new native-float type. Instead of
pln(16) r20.0<1>:F r10.4<0;1,0>:F r4.0<8;8,1>:F
we now have
mad(8) acc0<1>:NF r10.7<0;1,0>:F r4.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r20.0<1>:F acc0<8;8,1>:NF r5.0<8;8,1>:F r10.5<0;1,0>:F
mad(8) acc0<1>:NF r10.7<0;1,0>:F r6.0<8;8,1>:F r10.4<0;1,0>:F
mad(8) r21.0<1>:F acc0<8;8,1>:NF r7.0<8;8,1>:F r10.5<0;1,0>:F
... and in the case of SIMD8 only the first pair of MAD instructions is
used.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-06-14 14:47:19 -07:00
|
|
|
|
ALU3(MAD)
|
2013-06-13 14:55:18 -07:00
|
|
|
|
ALU3F(LRP)
|
2013-04-09 17:56:19 -07:00
|
|
|
|
ALU1(BFREV)
|
|
|
|
|
|
ALU3(BFE)
|
|
|
|
|
|
ALU2(BFI1)
|
|
|
|
|
|
ALU3(BFI2)
|
|
|
|
|
|
ALU1(FBH)
|
|
|
|
|
|
ALU1(FBL)
|
|
|
|
|
|
ALU1(CBIT)
|
2013-09-19 13:01:08 -07:00
|
|
|
|
ALU2(ADDC)
|
|
|
|
|
|
ALU2(SUBB)
|
2020-06-05 22:40:26 -07:00
|
|
|
|
ALU3(ADD3)
|
2024-02-15 22:09:40 -08:00
|
|
|
|
ALU1(MOV)
|
2010-10-14 11:40:19 -07:00
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ADD(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2010-09-04 21:28:04 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* 6.2.2: add */
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src0.type == BRW_TYPE_F ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
(src0.file == IMM &&
|
2024-04-20 17:08:02 -07:00
|
|
|
|
src0.type == BRW_TYPE_VF)) {
|
|
|
|
|
|
assert(src1.type != BRW_TYPE_UD);
|
|
|
|
|
|
assert(src1.type != BRW_TYPE_D);
|
2010-09-04 21:28:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src1.type == BRW_TYPE_F ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
(src1.file == IMM &&
|
2024-04-20 17:08:02 -07:00
|
|
|
|
src1.type == BRW_TYPE_VF)) {
|
|
|
|
|
|
assert(src0.type != BRW_TYPE_UD);
|
|
|
|
|
|
assert(src0.type != BRW_TYPE_D);
|
2010-09-04 21:28:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_AVG(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2012-07-07 08:28:46 -07:00
|
|
|
|
{
|
|
|
|
|
|
assert(dest.type == src0.type);
|
|
|
|
|
|
assert(src0.type == src1.type);
|
|
|
|
|
|
switch (src0.type) {
|
2024-04-20 17:08:02 -07:00
|
|
|
|
case BRW_TYPE_B:
|
|
|
|
|
|
case BRW_TYPE_UB:
|
|
|
|
|
|
case BRW_TYPE_W:
|
|
|
|
|
|
case BRW_TYPE_UW:
|
|
|
|
|
|
case BRW_TYPE_D:
|
|
|
|
|
|
case BRW_TYPE_UD:
|
2012-07-07 08:28:46 -07:00
|
|
|
|
break;
|
|
|
|
|
|
default:
|
2014-06-29 14:54:01 -07:00
|
|
|
|
unreachable("Bad type for brw_AVG");
|
2012-07-07 08:28:46 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_MUL(struct brw_codegen *p, struct brw_reg dest,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
2010-09-04 21:28:04 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* 6.32.38: mul */
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src0.type == BRW_TYPE_D ||
|
|
|
|
|
|
src0.type == BRW_TYPE_UD ||
|
|
|
|
|
|
src1.type == BRW_TYPE_D ||
|
|
|
|
|
|
src1.type == BRW_TYPE_UD) {
|
|
|
|
|
|
assert(dest.type != BRW_TYPE_F);
|
2010-09-04 21:28:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src0.type == BRW_TYPE_F ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
(src0.file == IMM &&
|
2024-04-20 17:08:02 -07:00
|
|
|
|
src0.type == BRW_TYPE_VF)) {
|
|
|
|
|
|
assert(src1.type != BRW_TYPE_UD);
|
|
|
|
|
|
assert(src1.type != BRW_TYPE_D);
|
2010-09-04 21:28:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
if (src1.type == BRW_TYPE_F ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
(src1.file == IMM &&
|
2024-04-20 17:08:02 -07:00
|
|
|
|
src1.type == BRW_TYPE_VF)) {
|
|
|
|
|
|
assert(src0.type != BRW_TYPE_UD);
|
|
|
|
|
|
assert(src0.type != BRW_TYPE_D);
|
2010-09-04 21:28:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src0.file != ARF ||
|
2010-09-04 21:28:04 -07:00
|
|
|
|
src0.nr != BRW_ARF_ACCUMULATOR);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src1.file != ARF ||
|
2010-09-04 21:28:04 -07:00
|
|
|
|
src1.nr != BRW_ARF_ACCUMULATOR);
|
|
|
|
|
|
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
|
|
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_LINE(struct brw_codegen *p, struct brw_reg dest,
|
2014-08-18 23:14:44 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
|
|
|
|
|
{
|
|
|
|
|
|
src0.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
|
|
src0.width = BRW_WIDTH_1;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_PLN(struct brw_codegen *p, struct brw_reg dest,
|
2015-04-06 21:46:54 -07:00
|
|
|
|
struct brw_reg src0, struct brw_reg src1)
|
|
|
|
|
|
{
|
|
|
|
|
|
src0.vstride = BRW_VERTICAL_STRIDE_0;
|
|
|
|
|
|
src0.width = BRW_WIDTH_1;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_0;
|
|
|
|
|
|
src1.vstride = BRW_VERTICAL_STRIDE_8;
|
|
|
|
|
|
src1.width = BRW_WIDTH_8;
|
|
|
|
|
|
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2023-09-20 12:42:24 -07:00
|
|
|
|
brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
|
|
|
|
|
|
unsigned rcount, struct brw_reg dest, struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1, struct brw_reg src2)
|
|
|
|
|
|
{
|
|
|
|
|
|
return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
|
|
|
|
|
|
src1, src2);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_NOP(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP);
|
2016-12-03 20:14:55 -08:00
|
|
|
|
memset(insn, 0, sizeof(*insn));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2019-09-03 17:51:17 -07:00
|
|
|
|
void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
|
|
|
|
|
|
{
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_cond_modifier(p->devinfo, insn, func);
|
2019-09-03 17:51:17 -07:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Comparisons, if/else/endif
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_JMPI(struct brw_codegen *p, struct brw_reg index,
|
2014-06-13 14:29:25 -07:00
|
|
|
|
unsigned predicate_control)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2014-05-27 22:45:16 -07:00
|
|
|
|
struct brw_reg ip = brw_ip_reg();
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
|
|
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_eu_inst_set_pred_control(devinfo, inst, predicate_control);
|
2009-07-02 16:32:19 +08:00
|
|
|
|
|
2014-06-04 17:08:57 -07:00
|
|
|
|
return inst;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
static void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
push_if_stack(struct brw_codegen *p, brw_eu_inst *inst)
|
2011-05-16 12:25:18 -07:00
|
|
|
|
{
|
2011-12-21 14:51:59 +08:00
|
|
|
|
p->if_stack[p->if_stack_depth] = inst - p->store;
|
2011-05-16 12:25:18 -07:00
|
|
|
|
|
|
|
|
|
|
p->if_stack_depth++;
|
|
|
|
|
|
if (p->if_stack_array_size <= p->if_stack_depth) {
|
|
|
|
|
|
p->if_stack_array_size *= 2;
|
2011-12-21 14:51:59 +08:00
|
|
|
|
p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
|
2011-05-16 12:25:18 -07:00
|
|
|
|
p->if_stack_array_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
pop_if_stack(struct brw_codegen *p)
|
2011-12-21 14:51:59 +08:00
|
|
|
|
{
|
|
|
|
|
|
p->if_stack_depth--;
|
|
|
|
|
|
return &p->store[p->if_stack[p->if_stack_depth]];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-12-06 12:13:32 -08:00
|
|
|
|
static void
|
2024-12-14 14:44:47 -08:00
|
|
|
|
push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst)
|
2011-12-06 12:13:32 -08:00
|
|
|
|
{
|
i965: fix invalid memory write
I noticed some heap corruption running virgl tests, and valgrind
helped me to track it down to the following error:
==29272== Invalid write of size 4
==29272== at 0x90283D4: push_loop_stack (brw_eu_emit.c:1307)
==29272== by 0x9029A7D: brw_DO (brw_eu_emit.c:1750)
==29272== by 0x90554B0: fs_generator::generate_code(cfg_t const*, int) (brw_fs_generator.cpp:1999)
==29272== by 0x904491F: brw_compile_fs (brw_fs.cpp:5685)
==29272== by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272== by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272== by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272== by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272== by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)
==29272== by 0x8C84325: _mesa_link_program (shaderapi.c:1042)
==29272== by 0x8C851D7: _mesa_LinkProgram (shaderapi.c:1515)
==29272== by 0x4E4B8E8: add_shader_program (vrend_renderer.c:880)
==29272== Address 0xf2f3cb0 is 0 bytes after a block of size 112 alloc'd
==29272== at 0x4C2AA98: calloc (vg_replace_malloc.c:711)
==29272== by 0x8ED11F7: ralloc_size (ralloc.c:113)
==29272== by 0x8ED1282: rzalloc_size (ralloc.c:134)
==29272== by 0x8ED14C0: rzalloc_array_size (ralloc.c:196)
==29272== by 0x9019C7B: brw_init_codegen (brw_eu.c:291)
==29272== by 0x904F565: fs_generator::fs_generator(brw_compiler const*, void*, void*, void const*, brw_stage_prog_data*, unsigned int, bool, gl_shader_stage) (brw_fs_generator.cpp:124)
==29272== by 0x9044883: brw_compile_fs (brw_fs.cpp:5675)
==29272== by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272== by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272== by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272== by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272== by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)
if_depth_in_loop is an array of size p->loop_stack_array_size, and
push_loop_stack() will access if_depth_in_loop[p->loop_stack_depth+1],
thus the condition to grow the array should be
p->loop_stack_array_size <= (p->loop_stack_depth + 1) (it's currently
off by 2...)
This can be reproduced by running the following test with virgl test
server:
LIBGL_ALWAYS_SOFTWARE=y GALLIUM_DRIVER=virpipe bin/shader_runner
./tests/shaders/glsl-fs-unroll-explosion.shader_test -auto
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2016-03-18 20:01:07 +01:00
|
|
|
|
if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
|
2011-12-06 12:13:32 -08:00
|
|
|
|
p->loop_stack_array_size *= 2;
|
|
|
|
|
|
p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
|
|
|
|
|
|
p->loop_stack_array_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
p->loop_stack[p->loop_stack_depth] = inst - p->store;
|
|
|
|
|
|
p->loop_stack_depth++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
static brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
get_inner_do_insn(struct brw_codegen *p)
|
2011-12-06 12:13:32 -08:00
|
|
|
|
{
|
|
|
|
|
|
return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* EU takes the value from the flag register and pushes it onto some
|
|
|
|
|
|
* sort of a stack (presumably merging with any flag value already on
|
|
|
|
|
|
* the stack). Within an if block, the flags at the top of the stack
|
|
|
|
|
|
* control execution on each channel of the unit, eg. on each of the
|
|
|
|
|
|
* 16 pixel values in our wm programs.
|
|
|
|
|
|
*
|
|
|
|
|
|
* When the matching 'else' instruction is reached (presumably by
|
|
|
|
|
|
* countdown of the instruction count patched in by our ELSE/ENDIF
|
2015-04-22 11:33:17 +01:00
|
|
|
|
* functions), the relevant flags are inverted.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*
|
|
|
|
|
|
* When the matching 'endif' instruction is reached, the flags are
|
|
|
|
|
|
* popped off. If the stack is now empty, normal execution resumes.
|
|
|
|
|
|
*/
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_IF(struct brw_codegen *p, unsigned execute_size)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn;
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_IF);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
|
|
|
|
|
/* Override the defaults for this instruction:
|
|
|
|
|
|
*/
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_eu_inst_set_uip(devinfo, insn, 0);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, execute_size);
|
|
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
push_if_stack(p, insn);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
/**
|
|
|
|
|
|
* Patch IF and ELSE instructions with appropriate jump targets.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
patch_IF_ELSE(struct brw_codegen *p,
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2009-07-15 10:40:16 +08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
assert(endif_inst != NULL);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
2015-04-14 18:00:06 -07:00
|
|
|
|
unsigned br = brw_jump_scale(devinfo);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
|
|
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
if (else_inst == NULL) {
|
|
|
|
|
|
/* Patch IF -> ENDIF */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
|
|
|
|
|
|
brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
|
2006-10-18 00:24:01 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
/* Patch ELSE -> ENDIF */
|
2024-02-15 22:09:40 -08:00
|
|
|
|
/* The IF instruction's JIP should point just past the ELSE */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
|
|
|
|
|
|
if (devinfo->ver < 11) {
|
|
|
|
|
|
/* Set the ELSE instruction to use branch_ctrl with a join
|
|
|
|
|
|
* jump target pointing at the NOP inserted right before
|
|
|
|
|
|
* the ENDIF instruction in order to make sure it is
|
|
|
|
|
|
* executed in all cases, since attempting to do the same
|
|
|
|
|
|
* as on other generations could cause the EU to jump at
|
|
|
|
|
|
* the instruction immediately after the ENDIF due to
|
|
|
|
|
|
* Wa_220160235, which could cause the program to continue
|
|
|
|
|
|
* running with all channels disabled.
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
|
|
|
|
|
|
brw_eu_inst_set_branch_control(devinfo, else_inst, true);
|
2011-03-15 23:53:40 -07:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
2024-02-15 22:09:40 -08:00
|
|
|
|
|
|
|
|
|
|
/* Since we don't set branch_ctrl on Gfx11+, the ELSE's
|
|
|
|
|
|
* JIP and UIP both should point to ENDIF on those
|
|
|
|
|
|
* platforms.
|
|
|
|
|
|
*/
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
|
2006-10-18 00:24:01 -07:00
|
|
|
|
}
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ELSE(struct brw_codegen *p)
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn;
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_ELSE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, 0);
|
|
|
|
|
|
brw_eu_inst_set_uip(devinfo, insn, 0);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
push_if_stack(p, insn);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2011-05-16 12:25:18 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_ENDIF(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = NULL;
|
|
|
|
|
|
brw_eu_inst *else_inst = NULL;
|
|
|
|
|
|
brw_eu_inst *if_inst = NULL;
|
|
|
|
|
|
brw_eu_inst *tmp;
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
|
2023-01-23 19:36:15 -08:00
|
|
|
|
assert(p->if_stack_depth > 0);
|
|
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 11 &&
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[
|
2023-01-23 19:36:15 -08:00
|
|
|
|
p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
|
|
|
|
|
|
/* Insert a NOP to be specified as join instruction within the
|
|
|
|
|
|
* ELSE block, which is valid for an ELSE instruction with
|
|
|
|
|
|
* branch_ctrl on. The ELSE instruction will be set to jump
|
|
|
|
|
|
* here instead of to the ENDIF instruction, since attempting to
|
|
|
|
|
|
* do the latter would prevent the ENDIF from being executed in
|
|
|
|
|
|
* some cases due to Wa_220160235, which could cause the program
|
|
|
|
|
|
* to continue running with all channels disabled.
|
|
|
|
|
|
*/
|
|
|
|
|
|
brw_NOP(p);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2011-12-21 15:32:02 +08:00
|
|
|
|
/*
|
2015-04-22 11:33:17 +01:00
|
|
|
|
* A single next_insn() may change the base address of instruction store
|
2011-12-21 15:32:02 +08:00
|
|
|
|
* memory(p->store), so call it first before referencing the instruction
|
|
|
|
|
|
* store pointer from an index
|
|
|
|
|
|
*/
|
2024-02-15 22:09:40 -08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_ENDIF);
|
2011-12-21 15:32:02 +08:00
|
|
|
|
|
|
|
|
|
|
/* Pop the IF and (optional) ELSE instructions from the stack */
|
|
|
|
|
|
tmp = pop_if_stack(p);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
|
2011-12-21 15:32:02 +08:00
|
|
|
|
else_inst = tmp;
|
|
|
|
|
|
tmp = pop_if_stack(p);
|
|
|
|
|
|
}
|
|
|
|
|
|
if_inst = tmp;
|
|
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
|
2024-02-15 22:09:40 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, 2);
|
i965: Rework IF/ELSE jump target back-patching.
The primary motivation for this is to better support Ivybridge control
flow. Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.
A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.
Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE). With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.
To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions. Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.
To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-05-16 13:40:00 -07:00
|
|
|
|
patch_IF_ELSE(p, if_inst, else_inst, insn);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_BREAK(struct brw_codegen *p)
|
2007-06-21 10:22:28 +08:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2007-06-21 10:22:28 +08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_BREAK);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2007-09-29 15:00:52 +08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_CONT(struct brw_codegen *p)
|
2010-12-01 14:02:14 -08:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn;
|
2010-12-01 14:02:14 -08:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_CONTINUE);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, brw_ip_reg());
|
2024-02-15 22:09:40 -08:00
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
2010-12-01 14:02:14 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2007-06-21 10:22:28 +08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2020-04-25 14:59:30 -05:00
|
|
|
|
brw_HALT(struct brw_codegen *p)
|
2012-12-06 10:15:08 -08:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn;
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_HALT);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 12) {
|
2018-11-09 14:13:35 -08:00
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0x0));
|
2014-08-10 07:10:55 -07:00
|
|
|
|
}
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
|
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2012-12-06 10:15:08 -08:00
|
|
|
|
return insn;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/* DO/WHILE loop:
|
2010-12-01 10:45:52 -08:00
|
|
|
|
*
|
|
|
|
|
|
* The DO/WHILE is just an unterminated loop -- break or continue are
|
|
|
|
|
|
* used for control within the loop. We have a few ways they can be
|
|
|
|
|
|
* done.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
|
|
|
|
|
|
* jip and no DO instruction.
|
|
|
|
|
|
*
|
2021-03-29 15:40:04 -07:00
|
|
|
|
* For gfx6, there's no more mask stack, so no need for DO. WHILE
|
2010-12-01 10:45:52 -08:00
|
|
|
|
* just points back to the first instruction of the loop.
|
2006-08-09 19:14:05 +00:00
|
|
|
|
*/
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_DO(struct brw_codegen *p, unsigned execute_size)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2024-02-15 22:09:40 -08:00
|
|
|
|
push_loop_stack(p, &p->store[p->nr_insn]);
|
|
|
|
|
|
return &p->store[p->nr_insn];
|
2011-12-06 12:30:03 -08:00
|
|
|
|
}
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_WHILE(struct brw_codegen *p)
|
2006-08-09 19:14:05 +00:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn, *do_insn;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
unsigned br = brw_jump_scale(devinfo);
|
2006-10-18 00:24:01 -07:00
|
|
|
|
|
2024-02-15 22:09:40 -08:00
|
|
|
|
insn = next_insn(p, BRW_OPCODE_WHILE);
|
|
|
|
|
|
do_insn = get_inner_do_insn(p);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
|
2024-02-15 22:09:40 -08:00
|
|
|
|
if (devinfo->ver < 12)
|
|
|
|
|
|
brw_set_src0(p, insn, brw_imm_d(0));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn));
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
2011-12-06 12:30:03 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2011-12-06 12:30:03 -08:00
|
|
|
|
p->loop_stack_depth--;
|
|
|
|
|
|
|
2007-06-21 10:22:28 +08:00
|
|
|
|
return insn;
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2015-04-16 11:06:57 -07:00
|
|
|
|
void brw_CMP(struct brw_codegen *p,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned conditional,
|
2006-08-09 19:14:05 +00:00
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2006-08-09 19:14:05 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-13 14:11:30 -08:00
|
|
|
|
void brw_CMPN(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dest,
|
|
|
|
|
|
unsigned conditional,
|
|
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
|
2021-02-13 14:11:30 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
|
2021-02-13 14:11:30 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
|
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2006-08-09 19:14:05 +00:00
|
|
|
|
/***********************************************************************
|
|
|
|
|
|
* Helpers for the various SEND message types:
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2021-03-29 15:40:04 -07:00
|
|
|
|
void gfx6_math(struct brw_codegen *p,
|
2010-08-22 01:33:57 -07:00
|
|
|
|
struct brw_reg dest,
|
2013-11-25 15:51:24 -08:00
|
|
|
|
unsigned function,
|
2010-08-22 01:33:57 -07:00
|
|
|
|
struct brw_reg src0,
|
|
|
|
|
|
struct brw_reg src1)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(dest.file == FIXED_GRF);
|
2010-10-11 13:30:12 -07:00
|
|
|
|
|
|
|
|
|
|
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
|
|
|
|
|
|
2011-09-28 17:37:51 -07:00
|
|
|
|
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
|
|
|
|
|
function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
|
|
|
|
|
|
function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(src0.type != BRW_TYPE_F);
|
|
|
|
|
|
assert(src1.type != BRW_TYPE_F);
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src1.file == FIXED_GRF ||
|
|
|
|
|
|
src1.file == IMM);
|
2021-08-24 10:50:42 +02:00
|
|
|
|
/* From BSpec 6647/47428 "[Instruction] Extended Math Function":
|
|
|
|
|
|
* INT DIV function does not support source modifiers.
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(!src0.negate);
|
|
|
|
|
|
assert(!src0.abs);
|
|
|
|
|
|
assert(!src1.negate);
|
|
|
|
|
|
assert(!src1.abs);
|
2011-09-28 17:37:51 -07:00
|
|
|
|
} else {
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(src0.type == BRW_TYPE_F ||
|
|
|
|
|
|
(src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
|
|
|
|
|
|
assert(src1.type == BRW_TYPE_F ||
|
|
|
|
|
|
(src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
|
2010-10-11 13:30:12 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-09-18 15:57:41 -07:00
|
|
|
|
/* This workaround says that we cannot use scalar broadcast with HF types.
|
|
|
|
|
|
* However, for is_scalar values, all 16 elements contain the same value, so
|
|
|
|
|
|
* we can replace a <0,1,0> region with <16,16,1> without ill effect.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (intel_needs_workaround(devinfo, 22016140776)) {
|
|
|
|
|
|
if (src0.is_scalar && src0.type == BRW_TYPE_HF) {
|
|
|
|
|
|
src0.vstride = BRW_VERTICAL_STRIDE_16;
|
|
|
|
|
|
src0.width = BRW_WIDTH_16;
|
|
|
|
|
|
src0.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
src0.swizzle = BRW_SWIZZLE_XYZW;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (src1.is_scalar && src1.type == BRW_TYPE_HF) {
|
|
|
|
|
|
src1.vstride = BRW_VERTICAL_STRIDE_16;
|
|
|
|
|
|
src1.width = BRW_WIDTH_16;
|
|
|
|
|
|
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
|
|
|
|
|
|
src1.swizzle = BRW_SWIZZLE_XYZW;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_math_function(devinfo, insn, function);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
|
2010-12-03 11:49:29 -08:00
|
|
|
|
brw_set_dest(p, insn, dest);
|
2011-05-10 16:51:12 -07:00
|
|
|
|
brw_set_src0(p, insn, src0);
|
|
|
|
|
|
brw_set_src1(p, insn, src1);
|
2010-08-22 01:33:57 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
2018-06-02 15:08:18 -07:00
|
|
|
|
void
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_send_indirect_message(struct brw_codegen *p,
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
unsigned sfid,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload,
|
2018-06-02 15:07:31 -07:00
|
|
|
|
struct brw_reg desc,
|
2024-11-20 08:12:52 -08:00
|
|
|
|
bool eot,
|
|
|
|
|
|
bool gather)
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
struct brw_eu_inst *send;
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
dst = retype(dst, BRW_TYPE_UW);
|
2016-01-31 18:28:42 -08:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(desc.type == BRW_TYPE_UD);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (desc.file == IMM) {
|
2015-10-28 10:58:09 -07:00
|
|
|
|
send = next_insn(p, BRW_OPCODE_SEND);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
|
2024-11-20 08:12:52 -08:00
|
|
|
|
brw_set_desc(p, send, desc.ud, gather);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
} else {
|
brw: move final send lowering up into the IR
Because we do emit the final send message form in code generation, a
lot of emissions look like this :
add(8) vgrf0, u0, 0x100
mov(1) a0.1, vgrf0 # emitted by the generator
send(8) ..., a0.1
By moving address register manipulation in the IR, we can get this
down to :
add(1) a0.1, u0, 0x100
send(8) ..., a0.1
This reduce register pressure around some send messages by 1 vgrf.
All lost shaders in the below results are fragment SIMD32, due to the
throughput estimator. If turned off, we loose no SIMD32 shaders with
this change.
DG2 results:
Assassin's Creed Valhalla:
Totals from 2044 (96.87% of 2110) affected shaders:
Instrs: 852879 -> 832044 (-2.44%); split: -2.45%, +0.00%
Subgroup size: 23832 -> 23824 (-0.03%)
Cycle count: 53345742 -> 52144277 (-2.25%); split: -5.08%, +2.82%
Spill count: 729 -> 554 (-24.01%); split: -28.40%, +4.39%
Fill count: 2005 -> 1256 (-37.36%)
Scratch Memory Size: 25600 -> 19456 (-24.00%); split: -32.00%, +8.00%
Max live registers: 116765 -> 115058 (-1.46%)
Max dispatch width: 19152 -> 18872 (-1.46%); split: +0.21%, -1.67%
Cyberpunk 2077:
Totals from 1181 (93.43% of 1264) affected shaders:
Instrs: 667192 -> 663615 (-0.54%); split: -0.55%, +0.01%
Subgroup size: 13016 -> 13032 (+0.12%)
Cycle count: 17383539 -> 17986073 (+3.47%); split: -0.93%, +4.39%
Spill count: 12 -> 8 (-33.33%)
Fill count: 9 -> 6 (-33.33%)
Dota2:
Totals from 173 (11.59% of 1493) affected shaders:
Cycle count: 274403 -> 280817 (+2.34%); split: -0.01%, +2.34%
Max live registers: 5787 -> 5779 (-0.14%)
Max dispatch width: 1344 -> 1152 (-14.29%)
Hitman3:
Totals from 5072 (95.39% of 5317) affected shaders:
Instrs: 2879952 -> 2841804 (-1.32%); split: -1.32%, +0.00%
Cycle count: 153208505 -> 165860401 (+8.26%); split: -2.22%, +10.48%
Spill count: 3942 -> 3200 (-18.82%)
Fill count: 10158 -> 8846 (-12.92%)
Scratch Memory Size: 257024 -> 223232 (-13.15%)
Max live registers: 328467 -> 324631 (-1.17%)
Max dispatch width: 43928 -> 42768 (-2.64%); split: +0.09%, -2.73%
Fortnite:
Totals from 360 (4.82% of 7472) affected shaders:
Instrs: 778068 -> 777925 (-0.02%)
Subgroup size: 3128 -> 3136 (+0.26%)
Cycle count: 38684183 -> 38734579 (+0.13%); split: -0.06%, +0.19%
Max live registers: 50689 -> 50658 (-0.06%)
Hogwarts Legacy:
Totals from 1376 (84.00% of 1638) affected shaders:
Instrs: 758810 -> 749727 (-1.20%); split: -1.23%, +0.03%
Cycle count: 27778983 -> 28805469 (+3.70%); split: -1.42%, +5.12%
Spill count: 2475 -> 2299 (-7.11%); split: -7.47%, +0.36%
Fill count: 2677 -> 2445 (-8.67%); split: -9.90%, +1.23%
Scratch Memory Size: 99328 -> 89088 (-10.31%)
Max live registers: 84969 -> 84671 (-0.35%); split: -0.58%, +0.23%
Max dispatch width: 11848 -> 11920 (+0.61%)
Metro Exodus:
Totals from 92 (0.21% of 43072) affected shaders:
Instrs: 262995 -> 262968 (-0.01%)
Cycle count: 13818007 -> 13851266 (+0.24%); split: -0.01%, +0.25%
Max live registers: 11152 -> 11140 (-0.11%)
Red Dead Redemption 2 :
Totals from 451 (7.71% of 5847) affected shaders:
Instrs: 754178 -> 753811 (-0.05%); split: -0.05%, +0.00%
Cycle count: 3484078523 -> 3484111965 (+0.00%); split: -0.00%, +0.00%
Max live registers: 42294 -> 42185 (-0.26%)
Spiderman Remastered:
Totals from 6820 (98.02% of 6958) affected shaders:
Instrs: 6921500 -> 6747933 (-2.51%); split: -4.16%, +1.65%
Cycle count: 234400692460 -> 236846720707 (+1.04%); split: -0.20%, +1.25%
Spill count: 72971 -> 72622 (-0.48%); split: -8.08%, +7.61%
Fill count: 212921 -> 198483 (-6.78%); split: -12.37%, +5.58%
Scratch Memory Size: 3491840 -> 3410944 (-2.32%); split: -12.05%, +9.74%
Max live registers: 493149 -> 487458 (-1.15%)
Max dispatch width: 56936 -> 56856 (-0.14%); split: +0.06%, -0.20%
Strange Brigade:
Totals from 3769 (91.21% of 4132) affected shaders:
Instrs: 1354476 -> 1321474 (-2.44%)
Cycle count: 25351530 -> 25339190 (-0.05%); split: -1.64%, +1.59%
Max live registers: 199057 -> 193656 (-2.71%)
Max dispatch width: 30272 -> 30240 (-0.11%)
Witcher 3:
Totals from 25 (2.40% of 1041) affected shaders:
Instrs: 24621 -> 24606 (-0.06%)
Cycle count: 2218793 -> 2217503 (-0.06%); split: -0.11%, +0.05%
Max live registers: 1963 -> 1955 (-0.41%)
LNL results:
Assassin's Creed Valhalla:
Totals from 1928 (98.02% of 1967) affected shaders:
Instrs: 856107 -> 835756 (-2.38%); split: -2.48%, +0.11%
Subgroup size: 41264 -> 41280 (+0.04%)
Cycle count: 64606590 -> 62371700 (-3.46%); split: -5.57%, +2.11%
Spill count: 915 -> 669 (-26.89%); split: -32.79%, +5.90%
Fill count: 2414 -> 1617 (-33.02%); split: -36.62%, +3.60%
Scratch Memory Size: 62464 -> 44032 (-29.51%); split: -36.07%, +6.56%
Max live registers: 205483 -> 202192 (-1.60%)
Cyberpunk 2077:
Totals from 1177 (96.40% of 1221) affected shaders:
Instrs: 682237 -> 678931 (-0.48%); split: -0.51%, +0.03%
Subgroup size: 24912 -> 24944 (+0.13%)
Cycle count: 24355928 -> 25089292 (+3.01%); split: -0.80%, +3.81%
Spill count: 8 -> 3 (-62.50%)
Fill count: 6 -> 3 (-50.00%)
Max live registers: 126922 -> 125472 (-1.14%)
Dota2:
Totals from 428 (32.47% of 1318) affected shaders:
Instrs: 89355 -> 89740 (+0.43%)
Cycle count: 1152412 -> 1152706 (+0.03%); split: -0.52%, +0.55%
Max live registers: 32863 -> 32847 (-0.05%)
Fortnite:
Totals from 5354 (81.72% of 6552) affected shaders:
Instrs: 4135059 -> 4239015 (+2.51%); split: -0.01%, +2.53%
Cycle count: 132557506 -> 132427302 (-0.10%); split: -0.75%, +0.65%
Spill count: 7144 -> 7234 (+1.26%); split: -0.46%, +1.72%
Fill count: 12086 -> 12403 (+2.62%); split: -0.73%, +3.35%
Scratch Memory Size: 600064 -> 604160 (+0.68%); split: -1.02%, +1.71%
Hitman3:
Totals from 4912 (97.09% of 5059) affected shaders:
Instrs: 2952124 -> 2916824 (-1.20%); split: -1.20%, +0.00%
Cycle count: 179985656 -> 189175250 (+5.11%); split: -2.44%, +7.55%
Spill count: 3739 -> 3136 (-16.13%)
Fill count: 10657 -> 9564 (-10.26%)
Scratch Memory Size: 373760 -> 318464 (-14.79%)
Max live registers: 597566 -> 589460 (-1.36%)
Hogwarts Legacy:
Totals from 1471 (96.33% of 1527) affected shaders:
Instrs: 748749 -> 766214 (+2.33%); split: -0.71%, +3.05%
Cycle count: 33301528 -> 34426308 (+3.38%); split: -1.30%, +4.68%
Spill count: 3278 -> 3070 (-6.35%); split: -8.30%, +1.95%
Fill count: 4553 -> 4097 (-10.02%); split: -10.85%, +0.83%
Scratch Memory Size: 251904 -> 217088 (-13.82%)
Max live registers: 168911 -> 168106 (-0.48%); split: -0.59%, +0.12%
Metro Exodus:
Totals from 18356 (49.81% of 36854) affected shaders:
Instrs: 7559386 -> 7621591 (+0.82%); split: -0.01%, +0.83%
Cycle count: 195240612 -> 196455186 (+0.62%); split: -1.22%, +1.84%
Spill count: 595 -> 546 (-8.24%)
Fill count: 1604 -> 1408 (-12.22%)
Max live registers: 2086937 -> 2086933 (-0.00%)
Red Dead Redemption 2:
Totals from 4171 (79.31% of 5259) affected shaders:
Instrs: 2619392 -> 2719587 (+3.83%); split: -0.00%, +3.83%
Subgroup size: 86416 -> 86432 (+0.02%)
Cycle count: 8542836160 -> 8531976886 (-0.13%); split: -0.65%, +0.53%
Fill count: 12949 -> 12970 (+0.16%); split: -0.43%, +0.59%
Scratch Memory Size: 401408 -> 385024 (-4.08%)
Spiderman Remastered:
Totals from 6639 (98.94% of 6710) affected shaders:
Instrs: 6877980 -> 6800592 (-1.13%); split: -3.11%, +1.98%
Cycle count: 282183352210 -> 282100051824 (-0.03%); split: -0.62%, +0.59%
Spill count: 63147 -> 64218 (+1.70%); split: -7.12%, +8.82%
Fill count: 184931 -> 175591 (-5.05%); split: -10.81%, +5.76%
Scratch Memory Size: 5318656 -> 5970944 (+12.26%); split: -5.91%, +18.17%
Max live registers: 918240 -> 906604 (-1.27%)
Strange Brigade:
Totals from 3675 (92.24% of 3984) affected shaders:
Instrs: 1462231 -> 1429345 (-2.25%); split: -2.25%, +0.00%
Cycle count: 37404050 -> 37345292 (-0.16%); split: -1.25%, +1.09%
Max live registers: 361849 -> 351265 (-2.92%)
Witcher 3:
Totals from 13 (46.43% of 28) affected shaders:
Instrs: 593 -> 660 (+11.30%)
Cycle count: 28302 -> 28714 (+1.46%)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
2024-02-29 20:51:50 +02:00
|
|
|
|
assert(desc.file == ADDRESS);
|
|
|
|
|
|
assert(desc.subnr == 0);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
send = next_insn(p, BRW_OPCODE_SEND);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (devinfo->ver >= 12)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, true);
|
2019-09-03 12:18:38 -07:00
|
|
|
|
else
|
brw: move final send lowering up into the IR
Because we do emit the final send message form in code generation, a
lot of emissions look like this :
add(8) vgrf0, u0, 0x100
mov(1) a0.1, vgrf0 # emitted by the generator
send(8) ..., a0.1
By moving address register manipulation in the IR, we can get this
down to :
add(1) a0.1, u0, 0x100
send(8) ..., a0.1
This reduce register pressure around some send messages by 1 vgrf.
All lost shaders in the below results are fragment SIMD32, due to the
throughput estimator. If turned off, we loose no SIMD32 shaders with
this change.
DG2 results:
Assassin's Creed Valhalla:
Totals from 2044 (96.87% of 2110) affected shaders:
Instrs: 852879 -> 832044 (-2.44%); split: -2.45%, +0.00%
Subgroup size: 23832 -> 23824 (-0.03%)
Cycle count: 53345742 -> 52144277 (-2.25%); split: -5.08%, +2.82%
Spill count: 729 -> 554 (-24.01%); split: -28.40%, +4.39%
Fill count: 2005 -> 1256 (-37.36%)
Scratch Memory Size: 25600 -> 19456 (-24.00%); split: -32.00%, +8.00%
Max live registers: 116765 -> 115058 (-1.46%)
Max dispatch width: 19152 -> 18872 (-1.46%); split: +0.21%, -1.67%
Cyberpunk 2077:
Totals from 1181 (93.43% of 1264) affected shaders:
Instrs: 667192 -> 663615 (-0.54%); split: -0.55%, +0.01%
Subgroup size: 13016 -> 13032 (+0.12%)
Cycle count: 17383539 -> 17986073 (+3.47%); split: -0.93%, +4.39%
Spill count: 12 -> 8 (-33.33%)
Fill count: 9 -> 6 (-33.33%)
Dota2:
Totals from 173 (11.59% of 1493) affected shaders:
Cycle count: 274403 -> 280817 (+2.34%); split: -0.01%, +2.34%
Max live registers: 5787 -> 5779 (-0.14%)
Max dispatch width: 1344 -> 1152 (-14.29%)
Hitman3:
Totals from 5072 (95.39% of 5317) affected shaders:
Instrs: 2879952 -> 2841804 (-1.32%); split: -1.32%, +0.00%
Cycle count: 153208505 -> 165860401 (+8.26%); split: -2.22%, +10.48%
Spill count: 3942 -> 3200 (-18.82%)
Fill count: 10158 -> 8846 (-12.92%)
Scratch Memory Size: 257024 -> 223232 (-13.15%)
Max live registers: 328467 -> 324631 (-1.17%)
Max dispatch width: 43928 -> 42768 (-2.64%); split: +0.09%, -2.73%
Fortnite:
Totals from 360 (4.82% of 7472) affected shaders:
Instrs: 778068 -> 777925 (-0.02%)
Subgroup size: 3128 -> 3136 (+0.26%)
Cycle count: 38684183 -> 38734579 (+0.13%); split: -0.06%, +0.19%
Max live registers: 50689 -> 50658 (-0.06%)
Hogwarts Legacy:
Totals from 1376 (84.00% of 1638) affected shaders:
Instrs: 758810 -> 749727 (-1.20%); split: -1.23%, +0.03%
Cycle count: 27778983 -> 28805469 (+3.70%); split: -1.42%, +5.12%
Spill count: 2475 -> 2299 (-7.11%); split: -7.47%, +0.36%
Fill count: 2677 -> 2445 (-8.67%); split: -9.90%, +1.23%
Scratch Memory Size: 99328 -> 89088 (-10.31%)
Max live registers: 84969 -> 84671 (-0.35%); split: -0.58%, +0.23%
Max dispatch width: 11848 -> 11920 (+0.61%)
Metro Exodus:
Totals from 92 (0.21% of 43072) affected shaders:
Instrs: 262995 -> 262968 (-0.01%)
Cycle count: 13818007 -> 13851266 (+0.24%); split: -0.01%, +0.25%
Max live registers: 11152 -> 11140 (-0.11%)
Red Dead Redemption 2 :
Totals from 451 (7.71% of 5847) affected shaders:
Instrs: 754178 -> 753811 (-0.05%); split: -0.05%, +0.00%
Cycle count: 3484078523 -> 3484111965 (+0.00%); split: -0.00%, +0.00%
Max live registers: 42294 -> 42185 (-0.26%)
Spiderman Remastered:
Totals from 6820 (98.02% of 6958) affected shaders:
Instrs: 6921500 -> 6747933 (-2.51%); split: -4.16%, +1.65%
Cycle count: 234400692460 -> 236846720707 (+1.04%); split: -0.20%, +1.25%
Spill count: 72971 -> 72622 (-0.48%); split: -8.08%, +7.61%
Fill count: 212921 -> 198483 (-6.78%); split: -12.37%, +5.58%
Scratch Memory Size: 3491840 -> 3410944 (-2.32%); split: -12.05%, +9.74%
Max live registers: 493149 -> 487458 (-1.15%)
Max dispatch width: 56936 -> 56856 (-0.14%); split: +0.06%, -0.20%
Strange Brigade:
Totals from 3769 (91.21% of 4132) affected shaders:
Instrs: 1354476 -> 1321474 (-2.44%)
Cycle count: 25351530 -> 25339190 (-0.05%); split: -1.64%, +1.59%
Max live registers: 199057 -> 193656 (-2.71%)
Max dispatch width: 30272 -> 30240 (-0.11%)
Witcher 3:
Totals from 25 (2.40% of 1041) affected shaders:
Instrs: 24621 -> 24606 (-0.06%)
Cycle count: 2218793 -> 2217503 (-0.06%); split: -0.11%, +0.05%
Max live registers: 1963 -> 1955 (-0.41%)
LNL results:
Assassin's Creed Valhalla:
Totals from 1928 (98.02% of 1967) affected shaders:
Instrs: 856107 -> 835756 (-2.38%); split: -2.48%, +0.11%
Subgroup size: 41264 -> 41280 (+0.04%)
Cycle count: 64606590 -> 62371700 (-3.46%); split: -5.57%, +2.11%
Spill count: 915 -> 669 (-26.89%); split: -32.79%, +5.90%
Fill count: 2414 -> 1617 (-33.02%); split: -36.62%, +3.60%
Scratch Memory Size: 62464 -> 44032 (-29.51%); split: -36.07%, +6.56%
Max live registers: 205483 -> 202192 (-1.60%)
Cyberpunk 2077:
Totals from 1177 (96.40% of 1221) affected shaders:
Instrs: 682237 -> 678931 (-0.48%); split: -0.51%, +0.03%
Subgroup size: 24912 -> 24944 (+0.13%)
Cycle count: 24355928 -> 25089292 (+3.01%); split: -0.80%, +3.81%
Spill count: 8 -> 3 (-62.50%)
Fill count: 6 -> 3 (-50.00%)
Max live registers: 126922 -> 125472 (-1.14%)
Dota2:
Totals from 428 (32.47% of 1318) affected shaders:
Instrs: 89355 -> 89740 (+0.43%)
Cycle count: 1152412 -> 1152706 (+0.03%); split: -0.52%, +0.55%
Max live registers: 32863 -> 32847 (-0.05%)
Fortnite:
Totals from 5354 (81.72% of 6552) affected shaders:
Instrs: 4135059 -> 4239015 (+2.51%); split: -0.01%, +2.53%
Cycle count: 132557506 -> 132427302 (-0.10%); split: -0.75%, +0.65%
Spill count: 7144 -> 7234 (+1.26%); split: -0.46%, +1.72%
Fill count: 12086 -> 12403 (+2.62%); split: -0.73%, +3.35%
Scratch Memory Size: 600064 -> 604160 (+0.68%); split: -1.02%, +1.71%
Hitman3:
Totals from 4912 (97.09% of 5059) affected shaders:
Instrs: 2952124 -> 2916824 (-1.20%); split: -1.20%, +0.00%
Cycle count: 179985656 -> 189175250 (+5.11%); split: -2.44%, +7.55%
Spill count: 3739 -> 3136 (-16.13%)
Fill count: 10657 -> 9564 (-10.26%)
Scratch Memory Size: 373760 -> 318464 (-14.79%)
Max live registers: 597566 -> 589460 (-1.36%)
Hogwarts Legacy:
Totals from 1471 (96.33% of 1527) affected shaders:
Instrs: 748749 -> 766214 (+2.33%); split: -0.71%, +3.05%
Cycle count: 33301528 -> 34426308 (+3.38%); split: -1.30%, +4.68%
Spill count: 3278 -> 3070 (-6.35%); split: -8.30%, +1.95%
Fill count: 4553 -> 4097 (-10.02%); split: -10.85%, +0.83%
Scratch Memory Size: 251904 -> 217088 (-13.82%)
Max live registers: 168911 -> 168106 (-0.48%); split: -0.59%, +0.12%
Metro Exodus:
Totals from 18356 (49.81% of 36854) affected shaders:
Instrs: 7559386 -> 7621591 (+0.82%); split: -0.01%, +0.83%
Cycle count: 195240612 -> 196455186 (+0.62%); split: -1.22%, +1.84%
Spill count: 595 -> 546 (-8.24%)
Fill count: 1604 -> 1408 (-12.22%)
Max live registers: 2086937 -> 2086933 (-0.00%)
Red Dead Redemption 2:
Totals from 4171 (79.31% of 5259) affected shaders:
Instrs: 2619392 -> 2719587 (+3.83%); split: -0.00%, +3.83%
Subgroup size: 86416 -> 86432 (+0.02%)
Cycle count: 8542836160 -> 8531976886 (-0.13%); split: -0.65%, +0.53%
Fill count: 12949 -> 12970 (+0.16%); split: -0.43%, +0.59%
Scratch Memory Size: 401408 -> 385024 (-4.08%)
Spiderman Remastered:
Totals from 6639 (98.94% of 6710) affected shaders:
Instrs: 6877980 -> 6800592 (-1.13%); split: -3.11%, +1.98%
Cycle count: 282183352210 -> 282100051824 (-0.03%); split: -0.62%, +0.59%
Spill count: 63147 -> 64218 (+1.70%); split: -7.12%, +8.82%
Fill count: 184931 -> 175591 (-5.05%); split: -10.81%, +5.76%
Scratch Memory Size: 5318656 -> 5970944 (+12.26%); split: -5.91%, +18.17%
Max live registers: 918240 -> 906604 (-1.27%)
Strange Brigade:
Totals from 3675 (92.24% of 3984) affected shaders:
Instrs: 1462231 -> 1429345 (-2.25%); split: -2.25%, +0.00%
Cycle count: 37404050 -> 37345292 (-0.16%); split: -1.25%, +1.09%
Max live registers: 361849 -> 351265 (-2.92%)
Witcher 3:
Totals from 13 (46.43% of 28) affected shaders:
Instrs: 593 -> 660 (+11.30%)
Cycle count: 28302 -> 28714 (+1.46%)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
2024-02-29 20:51:50 +02:00
|
|
|
|
brw_set_src1(p, send, desc);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
brw_set_dest(p, send, dst);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_sfid(devinfo, send, sfid);
|
|
|
|
|
|
brw_eu_inst_set_eot(devinfo, send, eot);
|
i965: Factor out logic to build a send message instruction with indirect descriptor.
This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices. In pseudocode:
| if (surface.file == BRW_IMMEDIATE_VALUE) {
| inst = brw_SEND(p, dst, payload);
| set_descriptor_control_bits(inst, surface, ...);
| } else {
| inst = brw_OR(p, addr, surface, 0);
| set_descriptor_control_bits(inst, ...);
| inst = brw_SEND(p, dst, payload);
| set_indirect_send_descriptor(inst, addr);
| }
This patch abstracts out this frequently recurring pattern so we can
now write:
| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);
without worrying about handling the immediate and indirect surface
index cases explicitly.
v2: Rebase. Improve documentatation and commit message. (Topi)
Preserve UW destination type cargo-cult. (Topi, Ken, Matt)
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2015-03-19 15:44:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2018-11-15 15:17:06 -06:00
|
|
|
|
void
|
|
|
|
|
|
brw_send_indirect_split_message(struct brw_codegen *p,
|
|
|
|
|
|
unsigned sfid,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg payload0,
|
|
|
|
|
|
struct brw_reg payload1,
|
|
|
|
|
|
struct brw_reg desc,
|
|
|
|
|
|
struct brw_reg ex_desc,
|
brw: move final send lowering up into the IR
Because we do emit the final send message form in code generation, a
lot of emissions look like this :
add(8) vgrf0, u0, 0x100
mov(1) a0.1, vgrf0 # emitted by the generator
send(8) ..., a0.1
By moving address register manipulation in the IR, we can get this
down to :
add(1) a0.1, u0, 0x100
send(8) ..., a0.1
This reduce register pressure around some send messages by 1 vgrf.
All lost shaders in the below results are fragment SIMD32, due to the
throughput estimator. If turned off, we loose no SIMD32 shaders with
this change.
DG2 results:
Assassin's Creed Valhalla:
Totals from 2044 (96.87% of 2110) affected shaders:
Instrs: 852879 -> 832044 (-2.44%); split: -2.45%, +0.00%
Subgroup size: 23832 -> 23824 (-0.03%)
Cycle count: 53345742 -> 52144277 (-2.25%); split: -5.08%, +2.82%
Spill count: 729 -> 554 (-24.01%); split: -28.40%, +4.39%
Fill count: 2005 -> 1256 (-37.36%)
Scratch Memory Size: 25600 -> 19456 (-24.00%); split: -32.00%, +8.00%
Max live registers: 116765 -> 115058 (-1.46%)
Max dispatch width: 19152 -> 18872 (-1.46%); split: +0.21%, -1.67%
Cyberpunk 2077:
Totals from 1181 (93.43% of 1264) affected shaders:
Instrs: 667192 -> 663615 (-0.54%); split: -0.55%, +0.01%
Subgroup size: 13016 -> 13032 (+0.12%)
Cycle count: 17383539 -> 17986073 (+3.47%); split: -0.93%, +4.39%
Spill count: 12 -> 8 (-33.33%)
Fill count: 9 -> 6 (-33.33%)
Dota2:
Totals from 173 (11.59% of 1493) affected shaders:
Cycle count: 274403 -> 280817 (+2.34%); split: -0.01%, +2.34%
Max live registers: 5787 -> 5779 (-0.14%)
Max dispatch width: 1344 -> 1152 (-14.29%)
Hitman3:
Totals from 5072 (95.39% of 5317) affected shaders:
Instrs: 2879952 -> 2841804 (-1.32%); split: -1.32%, +0.00%
Cycle count: 153208505 -> 165860401 (+8.26%); split: -2.22%, +10.48%
Spill count: 3942 -> 3200 (-18.82%)
Fill count: 10158 -> 8846 (-12.92%)
Scratch Memory Size: 257024 -> 223232 (-13.15%)
Max live registers: 328467 -> 324631 (-1.17%)
Max dispatch width: 43928 -> 42768 (-2.64%); split: +0.09%, -2.73%
Fortnite:
Totals from 360 (4.82% of 7472) affected shaders:
Instrs: 778068 -> 777925 (-0.02%)
Subgroup size: 3128 -> 3136 (+0.26%)
Cycle count: 38684183 -> 38734579 (+0.13%); split: -0.06%, +0.19%
Max live registers: 50689 -> 50658 (-0.06%)
Hogwarts Legacy:
Totals from 1376 (84.00% of 1638) affected shaders:
Instrs: 758810 -> 749727 (-1.20%); split: -1.23%, +0.03%
Cycle count: 27778983 -> 28805469 (+3.70%); split: -1.42%, +5.12%
Spill count: 2475 -> 2299 (-7.11%); split: -7.47%, +0.36%
Fill count: 2677 -> 2445 (-8.67%); split: -9.90%, +1.23%
Scratch Memory Size: 99328 -> 89088 (-10.31%)
Max live registers: 84969 -> 84671 (-0.35%); split: -0.58%, +0.23%
Max dispatch width: 11848 -> 11920 (+0.61%)
Metro Exodus:
Totals from 92 (0.21% of 43072) affected shaders:
Instrs: 262995 -> 262968 (-0.01%)
Cycle count: 13818007 -> 13851266 (+0.24%); split: -0.01%, +0.25%
Max live registers: 11152 -> 11140 (-0.11%)
Red Dead Redemption 2 :
Totals from 451 (7.71% of 5847) affected shaders:
Instrs: 754178 -> 753811 (-0.05%); split: -0.05%, +0.00%
Cycle count: 3484078523 -> 3484111965 (+0.00%); split: -0.00%, +0.00%
Max live registers: 42294 -> 42185 (-0.26%)
Spiderman Remastered:
Totals from 6820 (98.02% of 6958) affected shaders:
Instrs: 6921500 -> 6747933 (-2.51%); split: -4.16%, +1.65%
Cycle count: 234400692460 -> 236846720707 (+1.04%); split: -0.20%, +1.25%
Spill count: 72971 -> 72622 (-0.48%); split: -8.08%, +7.61%
Fill count: 212921 -> 198483 (-6.78%); split: -12.37%, +5.58%
Scratch Memory Size: 3491840 -> 3410944 (-2.32%); split: -12.05%, +9.74%
Max live registers: 493149 -> 487458 (-1.15%)
Max dispatch width: 56936 -> 56856 (-0.14%); split: +0.06%, -0.20%
Strange Brigade:
Totals from 3769 (91.21% of 4132) affected shaders:
Instrs: 1354476 -> 1321474 (-2.44%)
Cycle count: 25351530 -> 25339190 (-0.05%); split: -1.64%, +1.59%
Max live registers: 199057 -> 193656 (-2.71%)
Max dispatch width: 30272 -> 30240 (-0.11%)
Witcher 3:
Totals from 25 (2.40% of 1041) affected shaders:
Instrs: 24621 -> 24606 (-0.06%)
Cycle count: 2218793 -> 2217503 (-0.06%); split: -0.11%, +0.05%
Max live registers: 1963 -> 1955 (-0.41%)
LNL results:
Assassin's Creed Valhalla:
Totals from 1928 (98.02% of 1967) affected shaders:
Instrs: 856107 -> 835756 (-2.38%); split: -2.48%, +0.11%
Subgroup size: 41264 -> 41280 (+0.04%)
Cycle count: 64606590 -> 62371700 (-3.46%); split: -5.57%, +2.11%
Spill count: 915 -> 669 (-26.89%); split: -32.79%, +5.90%
Fill count: 2414 -> 1617 (-33.02%); split: -36.62%, +3.60%
Scratch Memory Size: 62464 -> 44032 (-29.51%); split: -36.07%, +6.56%
Max live registers: 205483 -> 202192 (-1.60%)
Cyberpunk 2077:
Totals from 1177 (96.40% of 1221) affected shaders:
Instrs: 682237 -> 678931 (-0.48%); split: -0.51%, +0.03%
Subgroup size: 24912 -> 24944 (+0.13%)
Cycle count: 24355928 -> 25089292 (+3.01%); split: -0.80%, +3.81%
Spill count: 8 -> 3 (-62.50%)
Fill count: 6 -> 3 (-50.00%)
Max live registers: 126922 -> 125472 (-1.14%)
Dota2:
Totals from 428 (32.47% of 1318) affected shaders:
Instrs: 89355 -> 89740 (+0.43%)
Cycle count: 1152412 -> 1152706 (+0.03%); split: -0.52%, +0.55%
Max live registers: 32863 -> 32847 (-0.05%)
Fortnite:
Totals from 5354 (81.72% of 6552) affected shaders:
Instrs: 4135059 -> 4239015 (+2.51%); split: -0.01%, +2.53%
Cycle count: 132557506 -> 132427302 (-0.10%); split: -0.75%, +0.65%
Spill count: 7144 -> 7234 (+1.26%); split: -0.46%, +1.72%
Fill count: 12086 -> 12403 (+2.62%); split: -0.73%, +3.35%
Scratch Memory Size: 600064 -> 604160 (+0.68%); split: -1.02%, +1.71%
Hitman3:
Totals from 4912 (97.09% of 5059) affected shaders:
Instrs: 2952124 -> 2916824 (-1.20%); split: -1.20%, +0.00%
Cycle count: 179985656 -> 189175250 (+5.11%); split: -2.44%, +7.55%
Spill count: 3739 -> 3136 (-16.13%)
Fill count: 10657 -> 9564 (-10.26%)
Scratch Memory Size: 373760 -> 318464 (-14.79%)
Max live registers: 597566 -> 589460 (-1.36%)
Hogwarts Legacy:
Totals from 1471 (96.33% of 1527) affected shaders:
Instrs: 748749 -> 766214 (+2.33%); split: -0.71%, +3.05%
Cycle count: 33301528 -> 34426308 (+3.38%); split: -1.30%, +4.68%
Spill count: 3278 -> 3070 (-6.35%); split: -8.30%, +1.95%
Fill count: 4553 -> 4097 (-10.02%); split: -10.85%, +0.83%
Scratch Memory Size: 251904 -> 217088 (-13.82%)
Max live registers: 168911 -> 168106 (-0.48%); split: -0.59%, +0.12%
Metro Exodus:
Totals from 18356 (49.81% of 36854) affected shaders:
Instrs: 7559386 -> 7621591 (+0.82%); split: -0.01%, +0.83%
Cycle count: 195240612 -> 196455186 (+0.62%); split: -1.22%, +1.84%
Spill count: 595 -> 546 (-8.24%)
Fill count: 1604 -> 1408 (-12.22%)
Max live registers: 2086937 -> 2086933 (-0.00%)
Red Dead Redemption 2:
Totals from 4171 (79.31% of 5259) affected shaders:
Instrs: 2619392 -> 2719587 (+3.83%); split: -0.00%, +3.83%
Subgroup size: 86416 -> 86432 (+0.02%)
Cycle count: 8542836160 -> 8531976886 (-0.13%); split: -0.65%, +0.53%
Fill count: 12949 -> 12970 (+0.16%); split: -0.43%, +0.59%
Scratch Memory Size: 401408 -> 385024 (-4.08%)
Spiderman Remastered:
Totals from 6639 (98.94% of 6710) affected shaders:
Instrs: 6877980 -> 6800592 (-1.13%); split: -3.11%, +1.98%
Cycle count: 282183352210 -> 282100051824 (-0.03%); split: -0.62%, +0.59%
Spill count: 63147 -> 64218 (+1.70%); split: -7.12%, +8.82%
Fill count: 184931 -> 175591 (-5.05%); split: -10.81%, +5.76%
Scratch Memory Size: 5318656 -> 5970944 (+12.26%); split: -5.91%, +18.17%
Max live registers: 918240 -> 906604 (-1.27%)
Strange Brigade:
Totals from 3675 (92.24% of 3984) affected shaders:
Instrs: 1462231 -> 1429345 (-2.25%); split: -2.25%, +0.00%
Cycle count: 37404050 -> 37345292 (-0.16%); split: -1.25%, +1.09%
Max live registers: 361849 -> 351265 (-2.92%)
Witcher 3:
Totals from 13 (46.43% of 28) affected shaders:
Instrs: 593 -> 660 (+11.30%)
Cycle count: 28302 -> 28714 (+1.46%)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
2024-02-29 20:51:50 +02:00
|
|
|
|
unsigned ex_mlen,
|
2022-10-14 17:49:00 +03:00
|
|
|
|
bool ex_bso,
|
2024-11-20 08:12:52 -08:00
|
|
|
|
bool eot,
|
|
|
|
|
|
bool gather)
|
2018-11-15 15:17:06 -06:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
struct brw_eu_inst *send;
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
dst = retype(dst, BRW_TYPE_UW);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
2024-04-20 17:08:02 -07:00
|
|
|
|
assert(desc.type == BRW_TYPE_UD);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
2021-03-29 14:41:58 -07:00
|
|
|
|
send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
brw_set_dest(p, send, dst);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
|
|
|
|
|
|
brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (desc.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0);
|
|
|
|
|
|
brw_eu_inst_set_send_desc(devinfo, send, desc.ud);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
assert(desc.file == ADDRESS);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert(desc.subnr == 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
if (ex_desc.file == IMM) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
|
2024-11-20 08:12:52 -08:00
|
|
|
|
brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, gather);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
} else {
|
2024-12-10 10:49:08 +02:00
|
|
|
|
assert(ex_desc.file == ADDRESS);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
assert((ex_desc.subnr & 0x3) == 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
|
|
|
|
|
|
brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
|
2022-09-12 17:00:32 -07:00
|
|
|
|
|
brw: Rename shared function enums for clarity
Our name for this enum was brw_message_target, but it's better known as
shared function ID or SFID. Call it brw_sfid to make it easier to find.
Now that brw only supports Gfx9+, we don't particularly care whether
SFIDs were introduced on Gfx4, Gfx6, or Gfx7.5. Also, the LSC SFIDs
were confusingly tagged "GFX12" but aren't available on Gfx12.0; they
were introduced with Alchemist/Meteorlake.
GFX6_SFID_DATAPORT_SAMPLER_CACHE in particular was confusing. It sounds
like the SFID to use for the sampler on Gfx6+, however it has nothing to
do with the sampler at all. BRW_SFID_SAMPLER remains the sampler SFID.
On Haswell, we ran out of messages on the main data cache data port, and
so they introduced two additional ones, for more messages. The modern
Tigerlake PRMs simply call these DP_DC0, DP_DC1, and DP_DC2. I think
the "sampler" name came from some idea about reorganizing messages that
never materialized (instead, the LSC came as a much larger cleanup).
Recently we've adopted the term "HDC" for the legacy data cluster, as
opposed to "LSC" for the modern Load/Store Cache. To make clear which
SFIDs target the legacy HDC dataports, we use BRW_SFID_HDC0/1/2.
We were also citing the G45, Sandybridge, and Ivybridge PRMs for a
compiler that supports none of those platforms. Cite modern docs.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33650>
2025-02-10 16:28:48 -08:00
|
|
|
|
if (devinfo->ver >= 20 && sfid == BRW_SFID_UGM)
|
2024-12-06 12:48:09 -08:00
|
|
|
|
brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2022-10-14 17:49:00 +03:00
|
|
|
|
if (ex_bso) {
|
2023-10-02 14:21:24 +03:00
|
|
|
|
/* The send instruction ExBSO field does not exist with UGM on Gfx20+,
|
|
|
|
|
|
* it is assumed.
|
|
|
|
|
|
*
|
|
|
|
|
|
* BSpec 56890
|
|
|
|
|
|
*/
|
brw: Rename shared function enums for clarity
Our name for this enum was brw_message_target, but it's better known as
shared function ID or SFID. Call it brw_sfid to make it easier to find.
Now that brw only supports Gfx9+, we don't particularly care whether
SFIDs were introduced on Gfx4, Gfx6, or Gfx7.5. Also, the LSC SFIDs
were confusingly tagged "GFX12" but aren't available on Gfx12.0; they
were introduced with Alchemist/Meteorlake.
GFX6_SFID_DATAPORT_SAMPLER_CACHE in particular was confusing. It sounds
like the SFID to use for the sampler on Gfx6+, however it has nothing to
do with the sampler at all. BRW_SFID_SAMPLER remains the sampler SFID.
On Haswell, we ran out of messages on the main data cache data port, and
so they introduced two additional ones, for more messages. The modern
Tigerlake PRMs simply call these DP_DC0, DP_DC1, and DP_DC2. I think
the "sampler" name came from some idea about reorganizing messages that
never materialized (instead, the LSC came as a much larger cleanup).
Recently we've adopted the term "HDC" for the legacy data cluster, as
opposed to "LSC" for the modern Load/Store Cache. To make clear which
SFIDs target the legacy HDC dataports, we use BRW_SFID_HDC0/1/2.
We were also citing the G45, Sandybridge, and Ivybridge PRMs for a
compiler that supports none of those platforms. Cite modern docs.
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33650>
2025-02-10 16:28:48 -08:00
|
|
|
|
if (devinfo->ver < 20 || sfid != BRW_SFID_UGM)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_send_ex_bso(devinfo, send, true);
|
brw: move final send lowering up into the IR
Because we do emit the final send message form in code generation, a
lot of emissions look like this :
add(8) vgrf0, u0, 0x100
mov(1) a0.1, vgrf0 # emitted by the generator
send(8) ..., a0.1
By moving address register manipulation in the IR, we can get this
down to :
add(1) a0.1, u0, 0x100
send(8) ..., a0.1
This reduce register pressure around some send messages by 1 vgrf.
All lost shaders in the below results are fragment SIMD32, due to the
throughput estimator. If turned off, we loose no SIMD32 shaders with
this change.
DG2 results:
Assassin's Creed Valhalla:
Totals from 2044 (96.87% of 2110) affected shaders:
Instrs: 852879 -> 832044 (-2.44%); split: -2.45%, +0.00%
Subgroup size: 23832 -> 23824 (-0.03%)
Cycle count: 53345742 -> 52144277 (-2.25%); split: -5.08%, +2.82%
Spill count: 729 -> 554 (-24.01%); split: -28.40%, +4.39%
Fill count: 2005 -> 1256 (-37.36%)
Scratch Memory Size: 25600 -> 19456 (-24.00%); split: -32.00%, +8.00%
Max live registers: 116765 -> 115058 (-1.46%)
Max dispatch width: 19152 -> 18872 (-1.46%); split: +0.21%, -1.67%
Cyberpunk 2077:
Totals from 1181 (93.43% of 1264) affected shaders:
Instrs: 667192 -> 663615 (-0.54%); split: -0.55%, +0.01%
Subgroup size: 13016 -> 13032 (+0.12%)
Cycle count: 17383539 -> 17986073 (+3.47%); split: -0.93%, +4.39%
Spill count: 12 -> 8 (-33.33%)
Fill count: 9 -> 6 (-33.33%)
Dota2:
Totals from 173 (11.59% of 1493) affected shaders:
Cycle count: 274403 -> 280817 (+2.34%); split: -0.01%, +2.34%
Max live registers: 5787 -> 5779 (-0.14%)
Max dispatch width: 1344 -> 1152 (-14.29%)
Hitman3:
Totals from 5072 (95.39% of 5317) affected shaders:
Instrs: 2879952 -> 2841804 (-1.32%); split: -1.32%, +0.00%
Cycle count: 153208505 -> 165860401 (+8.26%); split: -2.22%, +10.48%
Spill count: 3942 -> 3200 (-18.82%)
Fill count: 10158 -> 8846 (-12.92%)
Scratch Memory Size: 257024 -> 223232 (-13.15%)
Max live registers: 328467 -> 324631 (-1.17%)
Max dispatch width: 43928 -> 42768 (-2.64%); split: +0.09%, -2.73%
Fortnite:
Totals from 360 (4.82% of 7472) affected shaders:
Instrs: 778068 -> 777925 (-0.02%)
Subgroup size: 3128 -> 3136 (+0.26%)
Cycle count: 38684183 -> 38734579 (+0.13%); split: -0.06%, +0.19%
Max live registers: 50689 -> 50658 (-0.06%)
Hogwarts Legacy:
Totals from 1376 (84.00% of 1638) affected shaders:
Instrs: 758810 -> 749727 (-1.20%); split: -1.23%, +0.03%
Cycle count: 27778983 -> 28805469 (+3.70%); split: -1.42%, +5.12%
Spill count: 2475 -> 2299 (-7.11%); split: -7.47%, +0.36%
Fill count: 2677 -> 2445 (-8.67%); split: -9.90%, +1.23%
Scratch Memory Size: 99328 -> 89088 (-10.31%)
Max live registers: 84969 -> 84671 (-0.35%); split: -0.58%, +0.23%
Max dispatch width: 11848 -> 11920 (+0.61%)
Metro Exodus:
Totals from 92 (0.21% of 43072) affected shaders:
Instrs: 262995 -> 262968 (-0.01%)
Cycle count: 13818007 -> 13851266 (+0.24%); split: -0.01%, +0.25%
Max live registers: 11152 -> 11140 (-0.11%)
Red Dead Redemption 2 :
Totals from 451 (7.71% of 5847) affected shaders:
Instrs: 754178 -> 753811 (-0.05%); split: -0.05%, +0.00%
Cycle count: 3484078523 -> 3484111965 (+0.00%); split: -0.00%, +0.00%
Max live registers: 42294 -> 42185 (-0.26%)
Spiderman Remastered:
Totals from 6820 (98.02% of 6958) affected shaders:
Instrs: 6921500 -> 6747933 (-2.51%); split: -4.16%, +1.65%
Cycle count: 234400692460 -> 236846720707 (+1.04%); split: -0.20%, +1.25%
Spill count: 72971 -> 72622 (-0.48%); split: -8.08%, +7.61%
Fill count: 212921 -> 198483 (-6.78%); split: -12.37%, +5.58%
Scratch Memory Size: 3491840 -> 3410944 (-2.32%); split: -12.05%, +9.74%
Max live registers: 493149 -> 487458 (-1.15%)
Max dispatch width: 56936 -> 56856 (-0.14%); split: +0.06%, -0.20%
Strange Brigade:
Totals from 3769 (91.21% of 4132) affected shaders:
Instrs: 1354476 -> 1321474 (-2.44%)
Cycle count: 25351530 -> 25339190 (-0.05%); split: -1.64%, +1.59%
Max live registers: 199057 -> 193656 (-2.71%)
Max dispatch width: 30272 -> 30240 (-0.11%)
Witcher 3:
Totals from 25 (2.40% of 1041) affected shaders:
Instrs: 24621 -> 24606 (-0.06%)
Cycle count: 2218793 -> 2217503 (-0.06%); split: -0.11%, +0.05%
Max live registers: 1963 -> 1955 (-0.41%)
LNL results:
Assassin's Creed Valhalla:
Totals from 1928 (98.02% of 1967) affected shaders:
Instrs: 856107 -> 835756 (-2.38%); split: -2.48%, +0.11%
Subgroup size: 41264 -> 41280 (+0.04%)
Cycle count: 64606590 -> 62371700 (-3.46%); split: -5.57%, +2.11%
Spill count: 915 -> 669 (-26.89%); split: -32.79%, +5.90%
Fill count: 2414 -> 1617 (-33.02%); split: -36.62%, +3.60%
Scratch Memory Size: 62464 -> 44032 (-29.51%); split: -36.07%, +6.56%
Max live registers: 205483 -> 202192 (-1.60%)
Cyberpunk 2077:
Totals from 1177 (96.40% of 1221) affected shaders:
Instrs: 682237 -> 678931 (-0.48%); split: -0.51%, +0.03%
Subgroup size: 24912 -> 24944 (+0.13%)
Cycle count: 24355928 -> 25089292 (+3.01%); split: -0.80%, +3.81%
Spill count: 8 -> 3 (-62.50%)
Fill count: 6 -> 3 (-50.00%)
Max live registers: 126922 -> 125472 (-1.14%)
Dota2:
Totals from 428 (32.47% of 1318) affected shaders:
Instrs: 89355 -> 89740 (+0.43%)
Cycle count: 1152412 -> 1152706 (+0.03%); split: -0.52%, +0.55%
Max live registers: 32863 -> 32847 (-0.05%)
Fortnite:
Totals from 5354 (81.72% of 6552) affected shaders:
Instrs: 4135059 -> 4239015 (+2.51%); split: -0.01%, +2.53%
Cycle count: 132557506 -> 132427302 (-0.10%); split: -0.75%, +0.65%
Spill count: 7144 -> 7234 (+1.26%); split: -0.46%, +1.72%
Fill count: 12086 -> 12403 (+2.62%); split: -0.73%, +3.35%
Scratch Memory Size: 600064 -> 604160 (+0.68%); split: -1.02%, +1.71%
Hitman3:
Totals from 4912 (97.09% of 5059) affected shaders:
Instrs: 2952124 -> 2916824 (-1.20%); split: -1.20%, +0.00%
Cycle count: 179985656 -> 189175250 (+5.11%); split: -2.44%, +7.55%
Spill count: 3739 -> 3136 (-16.13%)
Fill count: 10657 -> 9564 (-10.26%)
Scratch Memory Size: 373760 -> 318464 (-14.79%)
Max live registers: 597566 -> 589460 (-1.36%)
Hogwarts Legacy:
Totals from 1471 (96.33% of 1527) affected shaders:
Instrs: 748749 -> 766214 (+2.33%); split: -0.71%, +3.05%
Cycle count: 33301528 -> 34426308 (+3.38%); split: -1.30%, +4.68%
Spill count: 3278 -> 3070 (-6.35%); split: -8.30%, +1.95%
Fill count: 4553 -> 4097 (-10.02%); split: -10.85%, +0.83%
Scratch Memory Size: 251904 -> 217088 (-13.82%)
Max live registers: 168911 -> 168106 (-0.48%); split: -0.59%, +0.12%
Metro Exodus:
Totals from 18356 (49.81% of 36854) affected shaders:
Instrs: 7559386 -> 7621591 (+0.82%); split: -0.01%, +0.83%
Cycle count: 195240612 -> 196455186 (+0.62%); split: -1.22%, +1.84%
Spill count: 595 -> 546 (-8.24%)
Fill count: 1604 -> 1408 (-12.22%)
Max live registers: 2086937 -> 2086933 (-0.00%)
Red Dead Redemption 2:
Totals from 4171 (79.31% of 5259) affected shaders:
Instrs: 2619392 -> 2719587 (+3.83%); split: -0.00%, +3.83%
Subgroup size: 86416 -> 86432 (+0.02%)
Cycle count: 8542836160 -> 8531976886 (-0.13%); split: -0.65%, +0.53%
Fill count: 12949 -> 12970 (+0.16%); split: -0.43%, +0.59%
Scratch Memory Size: 401408 -> 385024 (-4.08%)
Spiderman Remastered:
Totals from 6639 (98.94% of 6710) affected shaders:
Instrs: 6877980 -> 6800592 (-1.13%); split: -3.11%, +1.98%
Cycle count: 282183352210 -> 282100051824 (-0.03%); split: -0.62%, +0.59%
Spill count: 63147 -> 64218 (+1.70%); split: -7.12%, +8.82%
Fill count: 184931 -> 175591 (-5.05%); split: -10.81%, +5.76%
Scratch Memory Size: 5318656 -> 5970944 (+12.26%); split: -5.91%, +18.17%
Max live registers: 918240 -> 906604 (-1.27%)
Strange Brigade:
Totals from 3675 (92.24% of 3984) affected shaders:
Instrs: 1462231 -> 1429345 (-2.25%); split: -2.25%, +0.00%
Cycle count: 37404050 -> 37345292 (-0.16%); split: -1.25%, +1.09%
Max live registers: 361849 -> 351265 (-2.92%)
Witcher 3:
Totals from 13 (46.43% of 28) affected shaders:
Instrs: 593 -> 660 (+11.30%)
Cycle count: 28302 -> 28714 (+1.46%)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28199>
2024-02-29 20:51:50 +02:00
|
|
|
|
brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo));
|
2022-10-14 17:49:00 +03:00
|
|
|
|
}
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_sfid(devinfo, send, sfid);
|
|
|
|
|
|
brw_eu_inst_set_eot(devinfo, send, eot);
|
2018-11-15 15:17:06 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
2016-05-14 23:53:19 -07:00
|
|
|
|
static bool
|
2021-04-05 13:19:39 -07:00
|
|
|
|
while_jumps_before_offset(const struct intel_device_info *devinfo,
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn, int while_offset, int start_offset)
|
2016-05-14 23:53:19 -07:00
|
|
|
|
{
|
|
|
|
|
|
int scale = 16 / brw_jump_scale(devinfo);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
int jip = brw_eu_inst_jip(devinfo, insn);
|
2017-01-26 13:50:42 +11:00
|
|
|
|
assert(jip < 0);
|
2016-05-14 23:53:19 -07:00
|
|
|
|
return while_offset + jip * scale <= start_offset;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-12-01 11:46:46 -08:00
|
|
|
|
static int
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_find_next_block_end(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
int depth = 0;
|
|
|
|
|
|
|
2025-01-16 16:31:22 -05:00
|
|
|
|
for (offset = next_offset(p, store, start_offset);
|
2014-06-07 21:15:59 -07:00
|
|
|
|
offset < p->next_insn_offset;
|
2025-01-16 16:31:22 -05:00
|
|
|
|
offset = next_offset(p, store, offset)) {
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = store + offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
switch (brw_eu_inst_opcode(p->isa, insn)) {
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
case BRW_OPCODE_IF:
|
|
|
|
|
|
depth++;
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
case BRW_OPCODE_ENDIF:
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
if (depth == 0)
|
|
|
|
|
|
return offset;
|
|
|
|
|
|
depth--;
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
case BRW_OPCODE_WHILE:
|
i965: Fix JIP to skip over sibling do...while loops.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
do
...
while
...
while
Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break". brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.
Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).
See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.
This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.
The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal. Whew :)
(*) Technically, the test doesn't GPU hang currently, but only
because another bug prevents it from issuing pixel interpolator
messages entirely...with that fixed, it hangs.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2016-05-14 23:54:48 -07:00
|
|
|
|
/* If the while doesn't jump before our instruction, it's the end
|
|
|
|
|
|
* of a sibling do...while loop. Ignore it.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
|
|
|
|
|
continue;
|
2021-04-10 17:11:58 +02:00
|
|
|
|
FALLTHROUGH;
|
i965: Fix JIP to skip over sibling do...while loops.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
do
...
while
...
while
Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break". brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.
Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).
See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.
This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.
The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal. Whew :)
(*) Technically, the test doesn't GPU hang currently, but only
because another bug prevents it from issuing pixel interpolator
messages entirely...with that fixed, it hangs.
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2016-05-14 23:54:48 -07:00
|
|
|
|
case BRW_OPCODE_ELSE:
|
2012-12-12 12:47:50 -08:00
|
|
|
|
case BRW_OPCODE_HALT:
|
i965: Fix JIP to properly skip over unrelated control flow.
We've apparently always been botching JIP for sequences such as:
do
cmp.f0.0 ...
(+f0.0) break
...
if
...
else
...
endif
...
while
Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level. It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.
In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT. It ignores the IF and finds the ELSE,
setting JIP there.
This makes no sense at all. The break is supposed to skip over the
whole if/else/endif block entirely. They have a sibling relationship,
not a nesting relationship.
This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0. So when it sees the
IF, it ignores everything until after the ENDIF. That way, it finds
the end of the right block.
I noticed this while reading some assembly code. We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason. I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2015-11-17 18:24:11 -08:00
|
|
|
|
if (depth == 0)
|
|
|
|
|
|
return offset;
|
2021-04-13 17:21:56 +02:00
|
|
|
|
break;
|
2018-01-23 19:23:20 -08:00
|
|
|
|
default:
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2012-12-06 10:15:08 -08:00
|
|
|
|
|
|
|
|
|
|
return 0;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2021-03-29 15:40:04 -07:00
|
|
|
|
/* There is no DO instruction on gfx6, so to find the end of the loop
|
2010-12-01 11:46:46 -08:00
|
|
|
|
* we have to see if the loop is jumping back before our start
|
|
|
|
|
|
* instruction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
static int
|
2015-04-16 11:06:57 -07:00
|
|
|
|
brw_find_loop_end(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2012-02-03 12:05:05 +01:00
|
|
|
|
/* Always start after the instruction (such as a WHILE) we're trying to fix
|
|
|
|
|
|
* up.
|
|
|
|
|
|
*/
|
2025-01-16 16:31:22 -05:00
|
|
|
|
for (offset = next_offset(p, store, start_offset);
|
2014-06-07 21:15:59 -07:00
|
|
|
|
offset < p->next_insn_offset;
|
2025-01-16 16:31:22 -05:00
|
|
|
|
offset = next_offset(p, store, offset)) {
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = store + offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
|
2016-05-14 23:53:19 -07:00
|
|
|
|
if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
|
2014-05-17 12:53:56 -07:00
|
|
|
|
return offset;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-03-11 15:53:42 -04:00
|
|
|
|
unreachable("not reached");
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* After program generation, go back and update the UIP and JIP of
|
2012-12-06 10:15:08 -08:00
|
|
|
|
* BREAK, CONT, and HALT instructions to their correct locations.
|
2010-12-01 11:46:46 -08:00
|
|
|
|
*/
|
|
|
|
|
|
void
|
2016-08-29 15:57:41 -07:00
|
|
|
|
brw_set_uip_jip(struct brw_codegen *p, int start_offset)
|
2010-12-01 11:46:46 -08:00
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2014-05-17 12:53:56 -07:00
|
|
|
|
int offset;
|
2015-04-14 18:00:06 -07:00
|
|
|
|
int br = brw_jump_scale(devinfo);
|
2014-06-30 08:00:25 -07:00
|
|
|
|
int scale = 16 / br;
|
2012-02-03 12:05:05 +01:00
|
|
|
|
void *store = p->store;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2016-08-29 15:57:41 -07:00
|
|
|
|
for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *insn = store + offset;
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
switch (brw_eu_inst_opcode(p->isa, insn)) {
|
2021-11-09 16:09:23 -06:00
|
|
|
|
case BRW_OPCODE_BREAK: {
|
|
|
|
|
|
int block_end_offset = brw_find_next_block_end(p, offset);
|
2014-05-17 12:53:56 -07:00
|
|
|
|
assert(block_end_offset != 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
2021-03-29 15:46:12 -07:00
|
|
|
|
/* Gfx7 UIP points to WHILE; Gfx6 points just after it */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_uip(devinfo, insn,
|
2024-02-15 22:09:40 -08:00
|
|
|
|
(brw_find_loop_end(p, offset) - offset) / scale);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
break;
|
2021-11-09 16:09:23 -06:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
case BRW_OPCODE_CONTINUE: {
|
|
|
|
|
|
int block_end_offset = brw_find_next_block_end(p, offset);
|
2014-05-17 12:53:56 -07:00
|
|
|
|
assert(block_end_offset != 0);
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
|
|
|
|
|
brw_eu_inst_set_uip(devinfo, insn,
|
2014-06-04 17:08:57 -07:00
|
|
|
|
(brw_find_loop_end(p, offset) - offset) / scale);
|
2011-04-30 01:30:55 -07:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_uip(devinfo, insn) != 0);
|
|
|
|
|
|
assert(brw_eu_inst_jip(devinfo, insn) != 0);
|
2013-01-01 17:02:38 -08:00
|
|
|
|
break;
|
2021-11-09 16:09:23 -06:00
|
|
|
|
}
|
i965: Jump to the end of the next outer conditional block on ENDIFs.
From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):
"The endif instruction is also used to hop out of nested conditionals by
jumping to the end of the next outer conditional block when all
channels are disabled."
Also:
"Pseudocode:
Evaluate(WrEn);
if ( WrEn == 0 ) { // all channels false
Jump(IP + JIP);
}"
First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional. If any channels are active, it proceeds
to the next instruction (IP + 16). However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).
Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units). This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.
It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.
This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.
Reviewed-by: Eric Anholt <eric@anholt.net>
2012-12-12 02:20:05 -08:00
|
|
|
|
|
2014-08-28 13:34:22 -07:00
|
|
|
|
case BRW_OPCODE_ENDIF: {
|
2021-11-09 16:09:23 -06:00
|
|
|
|
int block_end_offset = brw_find_next_block_end(p, offset);
|
2014-08-28 13:34:22 -07:00
|
|
|
|
int32_t jump = (block_end_offset == 0) ?
|
|
|
|
|
|
1 * br : (block_end_offset - offset) / scale;
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, jump);
|
2010-12-01 11:46:46 -08:00
|
|
|
|
break;
|
2014-08-28 13:34:22 -07:00
|
|
|
|
}
|
i965: Jump to the end of the next outer conditional block on ENDIFs.
From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):
"The endif instruction is also used to hop out of nested conditionals by
jumping to the end of the next outer conditional block when all
channels are disabled."
Also:
"Pseudocode:
Evaluate(WrEn);
if ( WrEn == 0 ) { // all channels false
Jump(IP + JIP);
}"
First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional. If any channels are active, it proceeds
to the next instruction (IP + 16). However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).
Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units). This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.
It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.
This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.
Reviewed-by: Eric Anholt <eric@anholt.net>
2012-12-12 02:20:05 -08:00
|
|
|
|
|
2021-11-09 16:09:23 -06:00
|
|
|
|
case BRW_OPCODE_HALT: {
|
2012-12-06 10:15:08 -08:00
|
|
|
|
/* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
|
|
|
|
|
|
*
|
|
|
|
|
|
* "In case of the halt instruction not inside any conditional
|
|
|
|
|
|
* code block, the value of <JIP> and <UIP> should be the
|
|
|
|
|
|
* same. In case of the halt instruction inside conditional code
|
|
|
|
|
|
* block, the <UIP> should be the end of the program, and the
|
|
|
|
|
|
* <JIP> should be end of the most inner conditional code block."
|
|
|
|
|
|
*
|
|
|
|
|
|
* The uip will have already been set by whoever set up the
|
|
|
|
|
|
* instruction.
|
|
|
|
|
|
*/
|
2021-11-09 16:09:23 -06:00
|
|
|
|
int block_end_offset = brw_find_next_block_end(p, offset);
|
2014-05-17 12:53:56 -07:00
|
|
|
|
if (block_end_offset == 0) {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
|
2012-12-06 10:15:08 -08:00
|
|
|
|
} else {
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
|
2012-12-06 10:15:08 -08:00
|
|
|
|
}
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_uip(devinfo, insn) != 0);
|
|
|
|
|
|
assert(brw_eu_inst_jip(devinfo, insn) != 0);
|
2012-12-06 10:15:08 -08:00
|
|
|
|
break;
|
2021-11-09 16:09:23 -06:00
|
|
|
|
}
|
2018-01-23 19:23:20 -08:00
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
break;
|
2010-12-01 11:46:46 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2015-02-20 20:14:24 +02:00
|
|
|
|
void
|
|
|
|
|
|
brw_broadcast(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
struct brw_reg src,
|
|
|
|
|
|
struct brw_reg idx)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-03-27 16:02:18 -07:00
|
|
|
|
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
2016-05-19 00:10:03 -07:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
2024-03-27 16:02:18 -07:00
|
|
|
|
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
2016-05-19 00:10:03 -07:00
|
|
|
|
|
2024-08-20 11:48:54 -07:00
|
|
|
|
assert(src.file == FIXED_GRF &&
|
2015-02-20 20:14:24 +02:00
|
|
|
|
src.address_mode == BRW_ADDRESS_DIRECT);
|
2017-10-17 11:57:48 -07:00
|
|
|
|
assert(!src.abs && !src.negate);
|
2020-10-29 09:34:08 -05:00
|
|
|
|
|
|
|
|
|
|
/* Gen12.5 adds the following region restriction:
|
|
|
|
|
|
*
|
|
|
|
|
|
* "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
|
|
|
|
|
|
* and Quad-Word data must not be used."
|
|
|
|
|
|
*
|
|
|
|
|
|
* We require the source and destination types to match so stomp to an
|
|
|
|
|
|
* unsigned integer type.
|
|
|
|
|
|
*/
|
2017-10-17 11:57:48 -07:00
|
|
|
|
assert(src.type == dst.type);
|
2024-04-21 00:33:52 -07:00
|
|
|
|
src.type = dst.type =
|
|
|
|
|
|
brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
2024-03-27 16:02:18 -07:00
|
|
|
|
if ((src.vstride == 0 && src.hstride == 0) ||
|
2024-08-20 11:48:54 -07:00
|
|
|
|
idx.file == IMM) {
|
2015-02-20 20:14:24 +02:00
|
|
|
|
/* Trivial, the source is already uniform or the index is a constant.
|
|
|
|
|
|
* We will typically not get here if the optimizer is doing its job, but
|
|
|
|
|
|
* asserting would be mean.
|
|
|
|
|
|
*/
|
2024-12-16 16:12:25 -08:00
|
|
|
|
const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud;
|
2024-03-27 16:02:18 -07:00
|
|
|
|
src = stride(suboffset(src, i), 0, 1, 0);
|
2020-07-17 16:22:11 -05:00
|
|
|
|
|
2024-04-21 00:57:59 -07:00
|
|
|
|
if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
|
|
|
|
|
|
subscript(src, BRW_TYPE_D, 0));
|
2020-07-17 16:22:11 -05:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
|
|
|
|
|
|
subscript(src, BRW_TYPE_D, 1));
|
2020-07-17 16:22:11 -05:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_MOV(p, dst, src);
|
|
|
|
|
|
}
|
2015-02-20 20:14:24 +02:00
|
|
|
|
} else {
|
2017-10-17 11:57:48 -07:00
|
|
|
|
/* From the Haswell PRM section "Register Region Restrictions":
|
|
|
|
|
|
*
|
|
|
|
|
|
* "The lower bits of the AddressImmediate must not overflow to
|
|
|
|
|
|
* change the register address. The lower 5 bits of Address
|
|
|
|
|
|
* Immediate when added to lower 5 bits of address register gives
|
|
|
|
|
|
* the sub-register offset. The upper bits of Address Immediate
|
|
|
|
|
|
* when added to upper bits of address register gives the register
|
|
|
|
|
|
* address. Any overflow from sub-register offset is dropped."
|
|
|
|
|
|
*
|
|
|
|
|
|
* Fortunately, for broadcast, we never have a sub-register offset so
|
|
|
|
|
|
* this isn't an issue.
|
|
|
|
|
|
*/
|
|
|
|
|
|
assert(src.subnr == 0);
|
|
|
|
|
|
|
2024-03-27 16:02:18 -07:00
|
|
|
|
const struct brw_reg addr =
|
2024-04-20 17:08:02 -07:00
|
|
|
|
retype(brw_address_reg(0), BRW_TYPE_UD);
|
2024-03-27 16:02:18 -07:00
|
|
|
|
unsigned offset = src.nr * REG_SIZE + src.subnr;
|
|
|
|
|
|
/* Limit in bytes of the signed indirect addressing immediate. */
|
|
|
|
|
|
const unsigned limit = 512;
|
|
|
|
|
|
|
|
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
|
|
|
|
|
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
|
|
brw_set_default_flag_reg(p, 0, 0);
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
2024-03-27 16:02:18 -07:00
|
|
|
|
/* Take into account the component size and horizontal stride. */
|
|
|
|
|
|
assert(src.vstride == src.hstride + src.width);
|
|
|
|
|
|
brw_SHL(p, addr, vec1(idx),
|
2024-04-21 00:57:59 -07:00
|
|
|
|
brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
|
2024-03-27 16:02:18 -07:00
|
|
|
|
src.hstride - 1));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
|
2024-03-27 16:02:18 -07:00
|
|
|
|
/* We can only address up to limit bytes using the indirect
|
|
|
|
|
|
* addressing immediate, account for the difference if the source
|
|
|
|
|
|
* register is above this limit.
|
|
|
|
|
|
*/
|
|
|
|
|
|
if (offset >= limit) {
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
2024-03-27 16:02:18 -07:00
|
|
|
|
brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
|
|
|
|
|
|
offset = offset % limit;
|
|
|
|
|
|
}
|
2019-09-26 23:38:24 -07:00
|
|
|
|
|
2024-03-27 16:02:18 -07:00
|
|
|
|
brw_pop_insn_state(p);
|
|
|
|
|
|
|
|
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
|
|
|
|
|
|
|
|
|
|
|
/* Use indirect addressing to fetch the specified component. */
|
2024-04-21 00:57:59 -07:00
|
|
|
|
if (brw_type_size_bytes(src.type) > 4 &&
|
2024-03-27 16:02:18 -07:00
|
|
|
|
(intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
|
|
|
|
|
|
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
|
|
|
|
|
*
|
|
|
|
|
|
* "When source or destination datatype is 64b or operation is
|
|
|
|
|
|
* integer DWord multiply, indirect addressing must not be
|
|
|
|
|
|
* used."
|
|
|
|
|
|
*
|
|
|
|
|
|
* We may also not support Q/UQ types.
|
|
|
|
|
|
*
|
|
|
|
|
|
* To work around both of these, we do two integer MOVs instead
|
|
|
|
|
|
* of one 64-bit MOV. Because no double value should ever cross
|
|
|
|
|
|
* a register boundary, it's safe to use the immediate offset in
|
|
|
|
|
|
* the indirect here to handle adding 4 bytes to the offset and
|
|
|
|
|
|
* avoid the extra ADD to the register file.
|
2015-02-20 20:14:24 +02:00
|
|
|
|
*/
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
|
2024-03-27 16:02:18 -07:00
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset),
|
2024-04-20 17:08:02 -07:00
|
|
|
|
BRW_TYPE_D));
|
2024-03-27 16:02:18 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_null());
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
|
2024-03-27 16:02:18 -07:00
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset + 4),
|
2024-04-20 17:08:02 -07:00
|
|
|
|
BRW_TYPE_D));
|
2024-03-27 16:02:18 -07:00
|
|
|
|
} else {
|
|
|
|
|
|
brw_MOV(p, dst,
|
|
|
|
|
|
retype(brw_vec1_indirect(addr.subnr, offset), src.type));
|
2015-02-20 20:14:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2016-05-19 00:10:03 -07:00
|
|
|
|
|
|
|
|
|
|
brw_pop_insn_state(p);
|
2015-02-20 20:14:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-11-04 18:05:04 -08:00
|
|
|
|
|
2014-11-04 18:11:37 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* Emit the SEND message for a barrier
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_barrier(struct brw_codegen *p, struct brw_reg src)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
struct brw_eu_inst *inst;
|
2014-11-04 18:11:37 -08:00
|
|
|
|
|
2017-01-15 00:58:20 -08:00
|
|
|
|
brw_push_insn_state(p);
|
|
|
|
|
|
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
inst = next_insn(p, BRW_OPCODE_SEND);
|
2024-04-20 17:08:02 -07:00
|
|
|
|
brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
|
2014-11-04 18:11:37 -08:00
|
|
|
|
brw_set_src0(p, inst, src);
|
|
|
|
|
|
brw_set_src1(p, inst, brw_null_reg());
|
2022-08-01 16:45:30 +02:00
|
|
|
|
brw_set_desc(p, inst, brw_message_desc(devinfo,
|
2024-11-20 08:12:52 -08:00
|
|
|
|
1 * reg_unit(devinfo), 0, false), false);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
|
|
|
|
|
|
brw_eu_inst_set_gateway_subfuncid(devinfo, inst,
|
2014-11-04 18:11:37 -08:00
|
|
|
|
BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
2017-01-15 00:58:20 -08:00
|
|
|
|
brw_pop_insn_state(p);
|
2014-11-04 18:11:37 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-11-04 18:05:04 -08:00
|
|
|
|
/**
|
|
|
|
|
|
* Emit the wait instruction for a barrier
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_WAIT(struct brw_codegen *p)
|
|
|
|
|
|
{
|
2021-04-05 13:19:39 -07:00
|
|
|
|
const struct intel_device_info *devinfo = p->devinfo;
|
2024-12-14 14:44:47 -08:00
|
|
|
|
struct brw_eu_inst *insn;
|
2014-11-04 18:05:04 -08:00
|
|
|
|
|
|
|
|
|
|
struct brw_reg src = brw_notification_reg();
|
|
|
|
|
|
|
|
|
|
|
|
insn = next_insn(p, BRW_OPCODE_WAIT);
|
|
|
|
|
|
brw_set_dest(p, insn, src);
|
|
|
|
|
|
brw_set_src0(p, insn, src);
|
|
|
|
|
|
brw_set_src1(p, insn, brw_null_reg());
|
|
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
|
|
|
|
|
|
brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
|
2014-11-04 18:05:04 -08:00
|
|
|
|
}
|
2017-07-01 08:12:59 +02:00
|
|
|
|
|
|
|
|
|
|
void
|
2019-09-13 01:34:35 +03:00
|
|
|
|
brw_float_controls_mode(struct brw_codegen *p,
|
|
|
|
|
|
unsigned mode, unsigned mask)
|
|
|
|
|
|
{
|
2023-04-05 15:38:34 +03:00
|
|
|
|
assert(p->current->mask_control == BRW_MASK_DISABLE);
|
|
|
|
|
|
|
2019-09-13 01:34:35 +03:00
|
|
|
|
/* From the Skylake PRM, Volume 7, page 760:
|
|
|
|
|
|
* "Implementation Restriction on Register Access: When the control
|
|
|
|
|
|
* register is used as an explicit source and/or destination, hardware
|
|
|
|
|
|
* does not ensure execution pipeline coherency. Software must set the
|
|
|
|
|
|
* thread control field to ‘switch’ for an instruction that uses
|
|
|
|
|
|
* control register as an explicit operand."
|
2019-09-26 23:38:24 -07:00
|
|
|
|
*
|
2021-03-29 15:46:12 -07:00
|
|
|
|
* On Gfx12+ this is implemented in terms of SWSB annotations instead.
|
2019-09-13 01:34:35 +03:00
|
|
|
|
*/
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
|
|
|
|
|
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_imm_ud(~mask));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (p->devinfo->ver < 12)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
|
2019-09-13 01:34:35 +03:00
|
|
|
|
|
|
|
|
|
|
if (mode) {
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
2019-09-13 01:34:35 +03:00
|
|
|
|
brw_imm_ud(mode));
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (p->devinfo->ver < 12)
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
|
2017-07-01 08:12:59 +02:00
|
|
|
|
}
|
2019-09-26 23:38:24 -07:00
|
|
|
|
|
2021-03-29 14:41:58 -07:00
|
|
|
|
if (p->devinfo->ver >= 12)
|
2019-09-26 23:38:24 -07:00
|
|
|
|
brw_SYNC(p, TGL_SYNC_NOP);
|
2017-07-01 08:12:59 +02:00
|
|
|
|
}
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
|
|
|
|
|
void
|
2022-06-29 14:13:31 -07:00
|
|
|
|
brw_update_reloc_imm(const struct brw_isa_info *isa,
|
2024-12-14 14:44:47 -08:00
|
|
|
|
brw_eu_inst *inst,
|
2020-08-08 12:55:29 -05:00
|
|
|
|
uint32_t value)
|
|
|
|
|
|
{
|
2022-06-29 14:13:31 -07:00
|
|
|
|
const struct intel_device_info *devinfo = isa->devinfo;
|
|
|
|
|
|
|
2020-08-08 12:55:29 -05:00
|
|
|
|
/* Sanity check that the instruction is a MOV of an immediate */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
|
|
|
|
|
|
assert(brw_eu_inst_src0_reg_file(devinfo, inst) == IMM);
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
|
|
|
|
|
/* If it was compacted, we can't safely rewrite */
|
2024-12-06 12:50:23 -08:00
|
|
|
|
assert(brw_eu_inst_cmpt_control(devinfo, inst) == 0);
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
2024-12-06 12:50:23 -08:00
|
|
|
|
brw_eu_inst_set_imm_ud(devinfo, inst, value);
|
2020-08-08 12:55:29 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* A default value for constants that will be patched at run-time.
|
|
|
|
|
|
* We pick an arbitrary value that prevents instruction compaction.
|
|
|
|
|
|
*/
|
|
|
|
|
|
#define DEFAULT_PATCH_IMM 0x4a7cc037
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
brw_MOV_reloc_imm(struct brw_codegen *p,
|
|
|
|
|
|
struct brw_reg dst,
|
|
|
|
|
|
enum brw_reg_type src_type,
|
2023-09-08 00:05:13 +03:00
|
|
|
|
uint32_t id,
|
|
|
|
|
|
uint32_t base)
|
2020-08-08 12:55:29 -05:00
|
|
|
|
{
|
2024-04-21 00:57:59 -07:00
|
|
|
|
assert(brw_type_size_bytes(src_type) == 4);
|
|
|
|
|
|
assert(brw_type_size_bytes(dst.type) == 4);
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
2020-09-04 12:09:11 -05:00
|
|
|
|
brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
|
2023-09-08 00:05:13 +03:00
|
|
|
|
p->next_insn_offset, base);
|
2020-08-08 12:55:29 -05:00
|
|
|
|
|
|
|
|
|
|
brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
|
|
|
|
|
|
}
|