mesa/src/intel/compiler/brw_validate.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

622 lines
25 KiB
C++
Raw Normal View History

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** @file
*
* Implements a pass that validates various invariants of the IR. The current
* pass only validates that GRF's uses are sane. More can be added later.
*/
#include "brw_shader.h"
#include "brw_cfg.h"
#include "brw_eu.h"
#ifndef NDEBUG
static void
brw_validate_abort_with_message_fmt(const brw_shader &s,
const brw_inst *inst,
const char *filename,
unsigned line,
const char *fmt, ...)
{
fprintf(stderr,
"ASSERT: BRW %s validation failed!\n",
_mesa_shader_stage_to_abbrev(s.stage));
brw_print_instruction(s, inst, stderr);
fprintf(stderr, "%s:%d: ", filename, line);
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fputc('\n', stderr);
abort();
}
#define VAL_ASSERT(assertion) \
if (!(assertion)) { \
brw_validate_abort_with_message_fmt( \
s, inst, __FILE__, __LINE__, \
"'%s' failed\n", #assertion); \
}
#define VAL_ASSERT_CMP(A, COMPARATOR, B) \
{ \
const uintptr_t a = uintptr_t(A); \
const uintptr_t b = uintptr_t(B); \
if (!(a COMPARATOR b)) { \
brw_validate_abort_with_message_fmt( \
s, inst, __FILE__, __LINE__, \
"A %s B failed\n" \
" A = %s = %" PRIuPTR "\n" \
" B = %s = %" PRIuPTR "\n", \
#COMPARATOR, #A, a, #B, b); \
} \
}
#define VAL_ASSERT_EQ(A, B) VAL_ASSERT_CMP(A, ==, B)
#define VAL_ASSERT_NE(A, B) VAL_ASSERT_CMP(A, !=, B)
#define VAL_ASSERT_LT(A, B) VAL_ASSERT_CMP(A, <, B)
#define VAL_ASSERT_LE(A, B) VAL_ASSERT_CMP(A, <=, B)
#define VAL_ASSERT_GT(A, B) VAL_ASSERT_CMP(A, >, B)
#define VAL_ASSERT_GE(A, B) VAL_ASSERT_CMP(A, >=, B)
static inline bool
is_ud_imm(const brw_reg &reg)
{
return reg.file == IMM && reg.type == BRW_TYPE_UD;
}
static inline bool
is_d_imm(const brw_reg &reg)
{
return reg.file == IMM && reg.type == BRW_TYPE_D;
}
static void
validate_memory_logical(const brw_shader &s, const brw_inst *inst)
{
const intel_device_info *devinfo = s.devinfo;
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
VAL_ASSERT(is_d_imm(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET]));
enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
enum memory_logical_mode mode =
(memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
enum lsc_data_size data_size =
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
unsigned data_size_B = lsc_data_size_bytes(data_size);
if (!devinfo->has_lsc) {
VAL_ASSERT(data_size == LSC_DATA_SIZE_D8U32 ||
data_size == LSC_DATA_SIZE_D16U32 ||
data_size == LSC_DATA_SIZE_D32 ||
data_size == LSC_DATA_SIZE_D64);
if (transpose) {
const unsigned min_alignment =
mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
VAL_ASSERT_GE(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud, min_alignment);
}
}
VAL_ASSERT(!transpose || !include_helpers);
VAL_ASSERT(!transpose || lsc_opcode_has_transpose(op));
if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_BINDING].file, BAD_FILE);
if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
inst->components_read(MEMORY_LOGICAL_DATA1));
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA0].type,
inst->src[MEMORY_LOGICAL_DATA1].type);
}
if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
inst->components_read(MEMORY_LOGICAL_DATA0));
VAL_ASSERT_EQ(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type),
data_size_B);
}
if (inst->dst.file != BAD_FILE)
VAL_ASSERT_EQ(brw_type_size_bytes(inst->dst.type), data_size_B);
/** TGM messages cannot have a base offset */
if (mode == MEMORY_MODE_TYPED)
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d, 0);
/* Offset must be DWord aligned */
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d % 4), 0);
switch (inst->opcode) {
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
VAL_ASSERT(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK ||
op == LSC_OP_LOAD_CMASK_MSRT);
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA0].file, BAD_FILE);
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA1].file, BAD_FILE);
break;
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
VAL_ASSERT(lsc_opcode_is_store(op));
VAL_ASSERT_NE(inst->src[MEMORY_LOGICAL_DATA0].file, BAD_FILE);
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA1].file, BAD_FILE);
break;
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
VAL_ASSERT(lsc_opcode_is_atomic(op));
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE),
(lsc_op_num_data_values(op) < 1));
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE),
(lsc_op_num_data_values(op) < 2));
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, 1);
VAL_ASSERT(!include_helpers);
break;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid opcode");
}
}
static const char *
brw_shader_phase_to_string(enum brw_shader_phase phase)
{
switch (phase) {
case BRW_SHADER_PHASE_INITIAL: return "INITIAL";
case BRW_SHADER_PHASE_AFTER_NIR: return "AFTER_NIR";
case BRW_SHADER_PHASE_AFTER_OPT_LOOP: return "AFTER_OPT_LOOP";
case BRW_SHADER_PHASE_AFTER_EARLY_LOWERING: return "AFTER_EARLY_LOWERING";
case BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING: return "AFTER_MIDDLE_LOWERING";
case BRW_SHADER_PHASE_AFTER_LATE_LOWERING: return "AFTER_LATE_LOWERING";
case BRW_SHADER_PHASE_AFTER_REGALLOC: return "AFTER_REGALLOC";
case BRW_SHADER_PHASE_INVALID: break;
}
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid_phase");
return NULL;
}
static void
brw_validate_instruction_phase(const brw_shader &s, brw_inst *inst)
{
enum brw_shader_phase invalid_from = BRW_SHADER_PHASE_INVALID;
switch (inst->opcode) {
case FS_OPCODE_FB_WRITE_LOGICAL:
case FS_OPCODE_FB_READ_LOGICAL:
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL:
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXL_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
case SHADER_OPCODE_TXF_MCS_LOGICAL:
case SHADER_OPCODE_LOD_LOGICAL:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
case SHADER_OPCODE_GET_BUFFER_SIZE:
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
case RT_OPCODE_TRACE_RAY_LOGICAL:
case SHADER_OPCODE_URB_READ_LOGICAL:
case SHADER_OPCODE_URB_WRITE_LOGICAL:
case SHADER_OPCODE_REDUCE:
case SHADER_OPCODE_INCLUSIVE_SCAN:
case SHADER_OPCODE_EXCLUSIVE_SCAN:
case SHADER_OPCODE_VOTE_ANY:
case SHADER_OPCODE_VOTE_ALL:
case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
case SHADER_OPCODE_QUAD_SWAP:
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
case SHADER_OPCODE_READ_FROM_CHANNEL:
case SHADER_OPCODE_LOAD_REG:
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
break;
case SHADER_OPCODE_LOAD_PAYLOAD:
invalid_from = BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING;
break;
default:
/* Nothing to do. */
break;
}
assert(s.phase < BRW_SHADER_PHASE_INVALID);
if (s.phase >= invalid_from) {
fprintf(stderr, "INVALID INSTRUCTION IN PHASE: %s\n",
brw_shader_phase_to_string(s.phase));
brw_print_instruction(s, inst, stderr);
abort();
}
}
void
brw_validate(const brw_shader &s)
{
const intel_device_info *devinfo = s.devinfo;
if (!s.cfg)
return;
s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
foreach_block(block, s.cfg) {
/* Track the last used address register. Usage of the address register
* in the IR should be limited to within a block, otherwise we would
* unable to schedule some instructions without spilling the address
* register to a VGRF.
*
* Another pattern we stick to when using the address register in the IR
* is that we write and read the register in pairs of instruction.
*/
uint32_t last_used_address_register[16] = {};
foreach_inst_in_block (brw_inst, inst, block) {
VAL_ASSERT_EQ(inst->kind, brw_inst_kind_for_opcode(inst->opcode));
brw_validate_instruction_phase(s, inst);
switch (inst->opcode) {
case SHADER_OPCODE_SEND:
VAL_ASSERT_EQ(inst->sources, SEND_NUM_SRCS);
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_DESC]));
VAL_ASSERT(is_uniform(inst->src[SEND_SRC_EX_DESC]));
VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1].file, BAD_FILE);
VAL_ASSERT(inst->as_send()->ex_mlen > 0 ||
inst->src[SEND_SRC_PAYLOAD2].file == BAD_FILE);
/* Send payloads cannot be immediates nor have source modifiers */
for (unsigned i = 0; i < 2; i++) {
VAL_ASSERT_NE(inst->src[SEND_SRC_PAYLOAD1 + i].file, IMM);
VAL_ASSERT(!inst->src[SEND_SRC_PAYLOAD1 + i].abs);
VAL_ASSERT(!inst->src[SEND_SRC_PAYLOAD1 + i].negate);
}
break;
case SHADER_OPCODE_SEND_GATHER:
VAL_ASSERT(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
VAL_ASSERT_GE(devinfo->ver, 30);
break;
case BRW_OPCODE_MOV:
VAL_ASSERT_EQ(inst->sources, 1);
break;
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
validate_memory_logical(s, inst);
break;
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
VAL_ASSERT_EQ(inst->exec_size, 1);
VAL_ASSERT(inst->force_writemask_all);
VAL_ASSERT_EQ(inst->sources, 2);
VAL_ASSERT(is_ud_imm(inst->src[1])); /* commit enable */
break;
case SHADER_OPCODE_LOAD_REG: {
VAL_ASSERT_EQ(inst->sources, 1);
VAL_ASSERT_EQ(s.alloc.sizes[inst->dst.nr] * REG_SIZE, inst->size_written);
VAL_ASSERT(!inst->is_partial_write());
VAL_ASSERT_LE(inst->src[0].stride, 1);
/* For example, if file == UNIFORM, stride will be zero and offset
* may be non-zero.
*/
if (inst->src[0].stride != 0)
VAL_ASSERT_EQ(inst->src[0].offset, 0);
const brw_def_analysis &defs = s.def_analysis.require();
VAL_ASSERT_EQ(inst, defs.get(inst->dst));
break;
}
default:
break;
}
/* On Xe2, the "write the accumulator in addition to the explicit
* destination" bit no longer exists. Try to catch uses of this
* feature earlier in the process.
*/
if (devinfo->ver >= 20 && inst->writes_accumulator) {
VAL_ASSERT(inst->dst.is_accumulator() ||
inst->opcode == BRW_OPCODE_ADDC ||
inst->opcode == BRW_OPCODE_MACH ||
inst->opcode == BRW_OPCODE_SUBB);
}
if (inst->is_3src(s.compiler)) {
const unsigned integer_sources =
brw_type_is_int(inst->src[0].type) +
brw_type_is_int(inst->src[1].type) +
brw_type_is_int(inst->src[2].type);
const unsigned float_sources =
brw_type_is_float_or_bfloat(inst->src[0].type) +
brw_type_is_float_or_bfloat(inst->src[1].type) +
brw_type_is_float_or_bfloat(inst->src[2].type);
VAL_ASSERT((integer_sources == 3 && float_sources == 0) ||
(integer_sources == 0 && float_sources == 3));
if (devinfo->ver >= 10) {
for (unsigned i = 0; i < 3; i++) {
if (inst->src[i].file == IMM)
continue;
switch (inst->src[i].vstride) {
case BRW_VERTICAL_STRIDE_0:
case BRW_VERTICAL_STRIDE_4:
case BRW_VERTICAL_STRIDE_8:
case BRW_VERTICAL_STRIDE_16:
break;
case BRW_VERTICAL_STRIDE_1:
VAL_ASSERT_GE(devinfo->ver, 12);
break;
case BRW_VERTICAL_STRIDE_2:
VAL_ASSERT_LE(devinfo->ver, 11);
break;
default:
VAL_ASSERT(!"invalid vstride");
break;
}
}
} else if (s.grf_used != 0) {
/* Only perform the pre-Gfx10 checks after register allocation
* has occured.
*
* Many passes (e.g., constant copy propagation) will
* genenerate invalid 3-source instructions with the
* expectation that later passes (e.g., combine constants) will
* fix them.
*/
for (unsigned i = 0; i < 3; i++) {
VAL_ASSERT_NE(inst->src[i].file, IMM);
/* A stride of 1 (the usual case) or 0, with a special
* "repctrl" bit, is allowed. The repctrl bit doesn't work
* for 64-bit datatypes, so if the source type is 64-bit
* then only a stride of 1 is allowed. From the Broadwell
* PRM, Volume 7 "3D Media GPGPU", page 944:
*
* This is applicable to 32b datatypes and 16b datatype.
* 64b datatypes cannot use the replicate control.
*/
const unsigned stride_in_bytes = byte_stride(inst->src[i]);
const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
if (stride_in_bytes == 0) {
/* If the source is_scalar, then the stride will be
* converted to <4;4,1> in brw_lower_scalar_fp64_MAD
* after SIMD splitting.
*/
if (!inst->src[i].is_scalar)
VAL_ASSERT_LE(size_in_bytes, 4);
} else {
VAL_ASSERT_EQ(stride_in_bytes, size_in_bytes);
}
}
}
}
if (inst->dst.file == VGRF) {
VAL_ASSERT_LE(inst->dst.offset / REG_SIZE + regs_written(inst),
s.alloc.sizes[inst->dst.nr]);
if (inst->exec_size > 1)
VAL_ASSERT_NE(inst->dst.stride, 0);
} else if (inst->dst.is_address()) {
VAL_ASSERT_NE(inst->dst.nr, 0);
}
bool read_address_reg = false;
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
VAL_ASSERT_LE(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
s.alloc.sizes[inst->src[i].nr]);
} else if (inst->src[i].is_address()) {
VAL_ASSERT_NE(inst->src[i].nr, 0);
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) {
VAL_ASSERT_EQ(inst->src[i].nr,
last_used_address_register[inst->src[i].address_slot(hw)]);
}
read_address_reg = true;
}
}
/* Accumulator Registers, bspec 47251:
*
* "When destination is accumulator with offset 0, destination
* horizontal stride must be 1."
*/
if (intel_needs_workaround(devinfo, 14014617373) &&
inst->dst.is_accumulator() &&
phys_subnr(devinfo, inst->dst) == 0) {
VAL_ASSERT_EQ(inst->dst.hstride, 1);
}
if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
/* Wa_22016140776:
*
* Scalar broadcast on HF math (packed or unpacked) must not be
* used. Compiler must use a mov instruction to expand the
* scalar value to a vector before using in a HF (packed or
* unpacked) math operation.
*
* Since copy propagation knows about this restriction, nothing
* should be able to generate these invalid source strides. Detect
* potential problems sooner rather than later.
*/
if (devinfo->ver >= 20 && inst->writes_accumulator) {
VAL_ASSERT(inst->dst.is_accumulator() ||
inst->opcode == BRW_OPCODE_ADDC ||
inst->opcode == BRW_OPCODE_MACH ||
inst->opcode == BRW_OPCODE_SUBB);
}
if (inst->is_3src(s.compiler)) {
const unsigned integer_sources =
brw_type_is_int(inst->src[0].type) +
brw_type_is_int(inst->src[1].type) +
brw_type_is_int(inst->src[2].type);
const unsigned float_sources =
brw_type_is_float(inst->src[0].type) +
brw_type_is_float(inst->src[1].type) +
brw_type_is_float(inst->src[2].type);
VAL_ASSERT((integer_sources == 3 && float_sources == 0) ||
(integer_sources == 0 && float_sources == 3));
if (devinfo->ver >= 10) {
for (unsigned i = 0; i < 3; i++) {
if (inst->src[i].file == IMM)
continue;
switch (inst->src[i].vstride) {
case BRW_VERTICAL_STRIDE_0:
case BRW_VERTICAL_STRIDE_4:
case BRW_VERTICAL_STRIDE_8:
case BRW_VERTICAL_STRIDE_16:
break;
case BRW_VERTICAL_STRIDE_1:
VAL_ASSERT_GE(devinfo->ver, 12);
break;
case BRW_VERTICAL_STRIDE_2:
VAL_ASSERT_LE(devinfo->ver, 11);
break;
default:
VAL_ASSERT(!"invalid vstride");
break;
}
}
} else if (s.grf_used != 0) {
/* Only perform the pre-Gfx10 checks after register
* allocation has occured.
*
* Many passes (e.g., constant copy propagation) will
* genenerate invalid 3-source instructions with the
* expectation that later passes (e.g., combine constants)
* will fix them.
*/
for (unsigned i = 0; i < 3; i++) {
VAL_ASSERT_NE(inst->src[i].file, IMM);
/* A stride of 1 (the usual case) or 0, with a special
* "repctrl" bit, is allowed. The repctrl bit doesn't
* work for 64-bit datatypes, so if the source type is
* 64-bit then only a stride of 1 is allowed. From the
* Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
*
* This is applicable to 32b datatypes and 16b
* datatype. 64b datatypes cannot use the replicate
* control.
*/
const unsigned stride_in_bytes = byte_stride(inst->src[i]);
const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
if (stride_in_bytes == 0) {
VAL_ASSERT_LE(size_in_bytes, 4);
} else {
VAL_ASSERT_EQ(stride_in_bytes, size_in_bytes);
}
}
}
}
if (inst->dst.file == VGRF) {
VAL_ASSERT_LE(inst->dst.offset / REG_SIZE + regs_written(inst),
s.alloc.sizes[inst->dst.nr]);
}
for (unsigned i = 0; i < inst->sources; i++) {
VAL_ASSERT(inst->src[i].is_scalar ||
!is_uniform(inst->src[i]) ||
inst->src[i].type != BRW_TYPE_HF);
}
}
/* Update the last used address register. */
if (read_address_reg) {
/* When an instruction only reads the address register, we assume
* the read parts are never going to be used again.
*/
for (unsigned i = 0; i < inst->sources; i++) {
if (!inst->src[i].is_address())
continue;
for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2)
last_used_address_register[inst->src[i].address_slot(hw)] = 0;
}
}
if (inst->dst.is_address()) {
/* For the written part of the address register */
for (unsigned hw = 0; hw < inst->size_written; hw += 2)
last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr;
} else if (inst->uses_address_register_implicitly()) {
/* If the instruction is making use of the address register,
* discard the entire thing.
*/
memset(last_used_address_register, 0,
sizeof(last_used_address_register));
}
}
}
}
#endif