intel/brw: Remove Gfx8- code from EU validation

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
Caio Oliveira 2024-02-16 13:38:19 -08:00 committed by Marge Bot
parent f321e555b6
commit 99f173ddd2

View file

@ -266,14 +266,12 @@ invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
* No invalid encodings on Gfx10-12 (align1)
*/
} else {
if (devinfo->ver > 6) {
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
(num_sources > 0 &&
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
(num_sources > 1 &&
brw_inst_src1_reg_file(devinfo, inst) == MRF),
"invalid register file encoding");
}
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
(num_sources > 0 &&
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
(num_sources > 1 &&
brw_inst_src1_reg_file(devinfo, inst) == MRF),
"invalid register file encoding");
}
if (error_msg.str)
@ -420,24 +418,20 @@ send_restrictions(const struct brw_isa_info *isa,
ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
"send must use direct addressing");
if (devinfo->ver >= 7) {
ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
"send from non-GRF");
ERROR_IF(brw_inst_eot(devinfo, inst) &&
brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
"send with EOT must use g112-g127");
}
ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
"send from non-GRF");
ERROR_IF(brw_inst_eot(devinfo, inst) &&
brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
"send with EOT must use g112-g127");
if (devinfo->ver >= 8) {
ERROR_IF(!dst_is_null(devinfo, inst) &&
(brw_inst_dst_da_reg_nr(devinfo, inst) +
brw_inst_rlen(devinfo, inst) > 127) &&
(brw_inst_src0_da_reg_nr(devinfo, inst) +
brw_inst_mlen(devinfo, inst) >
brw_inst_dst_da_reg_nr(devinfo, inst)),
"r127 must not be used for return address when there is "
"a src and dest overlap");
}
ERROR_IF(!dst_is_null(devinfo, inst) &&
(brw_inst_dst_da_reg_nr(devinfo, inst) +
brw_inst_rlen(devinfo, inst) > 127) &&
(brw_inst_src0_da_reg_nr(devinfo, inst) +
brw_inst_mlen(devinfo, inst) >
brw_inst_dst_da_reg_nr(devinfo, inst)),
"r127 must not be used for return address when there is "
"a src and dest overlap");
}
return error_msg;
@ -530,14 +524,6 @@ execution_type(const struct brw_isa_info *isa, const brw_inst *inst)
src1_exec_type == BRW_REGISTER_TYPE_NF)
return BRW_REGISTER_TYPE_NF;
/* Mixed operand types where one is float is float on Gen < 6
* (and not allowed on later platforms)
*/
if (devinfo->ver < 6 &&
(src0_exec_type == BRW_REGISTER_TYPE_F ||
src1_exec_type == BRW_REGISTER_TYPE_F))
return BRW_REGISTER_TYPE_F;
if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
src1_exec_type == BRW_REGISTER_TYPE_Q)
return BRW_REGISTER_TYPE_Q;
@ -626,9 +612,6 @@ is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst)
{
const struct intel_device_info *devinfo = isa->devinfo;
if (devinfo->ver < 8)
return false;
if (inst_is_send(isa, inst))
return false;
@ -809,14 +792,6 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
unsigned exec_type_size = brw_reg_type_to_size(exec_type);
unsigned dst_type_size = brw_reg_type_to_size(dst_type);
/* On IVB/BYT, region parameters and execution size for DF are in terms of
* 32-bit elements, so they are doubled. For evaluating the validity of an
* instruction, we halve them.
*/
if (devinfo->verx10 == 70 &&
exec_type_size == 8 && dst_type_size == 4)
dst_type_size = 8;
if (is_byte_conversion(isa, inst)) {
/* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
*
@ -913,9 +888,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
ERROR_IF(subreg % 4 != 0,
"Conversions between integer and half-float must be "
"aligned to a DWord on the destination");
} else if ((devinfo->platform == INTEL_PLATFORM_CHV ||
devinfo->ver >= 9) &&
dst_type == BRW_REGISTER_TYPE_HF) {
} else if (dst_type == BRW_REGISTER_TYPE_HF) {
unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
ERROR_IF(dst_stride != 2 &&
!(is_mixed_float(isa, inst) &&
@ -931,9 +904,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
* override the general rule for the ratio of sizes of the destination type
* and the execution type. We will add validation for those in a later patch.
*/
bool validate_dst_size_and_exec_size_ratio =
!is_mixed_float(isa, inst) ||
!(devinfo->platform == INTEL_PLATFORM_CHV || devinfo->ver >= 9);
bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(isa, inst);
if (validate_dst_size_and_exec_size_ratio &&
exec_type_size > dst_type_size) {
@ -952,7 +923,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
* Implementation Restriction: The relaxed alignment rule for byte
* destination (#10.5) is not supported.
*/
if (devinfo->verx10 >= 45 && dst_type_is_byte) {
if (dst_type_is_byte) {
ERROR_IF(subreg % exec_type_size != 0 &&
subreg % exec_type_size != 1,
"Destination subreg must be aligned to the size of the "
@ -1000,33 +971,19 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
"Destination Horizontal Stride must be 1");
if (num_sources >= 1) {
if (devinfo->verx10 >= 75) {
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
} else {
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0 or 4 is allowed");
}
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
}
if (num_sources == 2) {
if (devinfo->verx10 >= 75) {
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
} else {
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0 or 4 is allowed");
}
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
}
return error_msg;
@ -1055,14 +1012,6 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
}
#undef DO_SRC
/* On IVB/BYT, region parameters and execution size for DF are in terms of
* 32-bit elements, so they are doubled. For evaluating the validity of an
* instruction, we halve them.
*/
if (devinfo->verx10 == 70 &&
element_size == 8)
element_size = 4;
/* ExecSize must be greater than or equal to Width. */
ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
"to Width");
@ -1500,74 +1449,14 @@ region_alignment_rules(const struct brw_isa_info *isa,
if (error_msg.str)
return error_msg;
/* On IVB/BYT, region parameters and execution size for DF are in terms of
* 32-bit elements, so they are doubled. For evaluating the validity of an
* instruction, we halve them.
*/
if (devinfo->verx10 == 70 &&
element_size == 8)
element_size = 4;
align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
exec_size == 1 ? 0 : exec_size * stride,
exec_size == 1 ? 1 : exec_size,
exec_size == 1 ? 0 : stride);
unsigned dst_regs = registers_read(dst_access_mask);
unsigned src0_regs = registers_read(src0_access_mask);
unsigned src1_regs = registers_read(src1_access_mask);
/* The SNB, IVB, HSW, BDW, and CHV PRMs say:
*
* When an instruction has a source region spanning two registers and a
* destination region contained in one register, the number of elements
* must be the same between two sources and one of the following must be
* true:
*
* 1. The destination region is entirely contained in the lower OWord
* of a register.
* 2. The destination region is entirely contained in the upper OWord
* of a register.
* 3. The destination elements are evenly split between the two OWords
* of a register.
*/
if (devinfo->ver <= 8) {
if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
unsigned upper_oword_writes = 0, lower_oword_writes = 0;
for (unsigned i = 0; i < exec_size; i++) {
if (dst_access_mask[i] > 0x0000FFFF) {
upper_oword_writes++;
} else {
assert(dst_access_mask[i] != 0);
lower_oword_writes++;
}
}
ERROR_IF(lower_oword_writes != 0 &&
upper_oword_writes != 0 &&
upper_oword_writes != lower_oword_writes,
"Writes must be to only one OWord or "
"evenly split between OWords");
}
}
/* The IVB and HSW PRMs say:
*
* When an instruction has a source region that spans two registers and
* the destination spans two registers, the destination elements must be
* evenly split between the two registers [...]
*
* The SNB PRM contains similar wording (but written in a much more
* confusing manner).
*
* The BDW PRM says:
*
* When destination spans two registers, the source may be one or two
* registers. The destination elements must be evenly split between the
* two registers.
*
* The SKL PRM says:
/* The SKL PRM says:
*
* When destination of MATH instruction spans two registers, the
* destination elements must be evenly split between the two registers.
@ -1575,15 +1464,7 @@ region_alignment_rules(const struct brw_isa_info *isa,
* It is not known whether this restriction applies to KBL other Gens after
* SKL.
*/
if (devinfo->ver <= 8 ||
brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
/* Nothing explicitly states that on Gen < 8 elements must be evenly
* split between two destination registers in the two exceptional
* source-region-spans-one-register cases, but since Broadwell requires
* evenly split writes regardless of source region, we assume that it was
* an oversight and require it.
*/
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
if (dst_regs == 2) {
unsigned upper_reg_writes = 0, lower_reg_writes = 0;
@ -1602,143 +1483,6 @@ region_alignment_rules(const struct brw_isa_info *isa,
}
}
/* The IVB and HSW PRMs say:
*
* When an instruction has a source region that spans two registers and
* the destination spans two registers, the destination elements must be
* evenly split between the two registers and each destination register
* must be entirely derived from one source register.
*
* Note: In such cases, the regioning parameters must ensure that the
* offset from the two source registers is the same.
*
* The SNB PRM contains similar wording (but written in a much more
* confusing manner).
*
* There are effectively three rules stated here:
*
* For an instruction with a source and a destination spanning two
* registers,
*
* (1) destination elements must be evenly split between the two
* registers
* (2) all destination elements in a register must be derived
* from one source register
* (3) the offset (i.e. the starting location in each of the two
* registers spanned by a region) must be the same in the two
* registers spanned by a region
*
* It is impossible to violate rule (1) without violating (2) or (3), so we
* do not attempt to validate it.
*/
if (devinfo->ver <= 7 && dst_regs == 2) {
for (unsigned i = 0; i < num_sources; i++) {
#define DO_SRC(n) \
if (src ## n ## _regs <= 1) \
continue; \
\
for (unsigned i = 0; i < exec_size; i++) { \
if ((dst_access_mask[i] > 0xFFFFFFFF) != \
(src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
ERROR("Each destination register must be entirely derived " \
"from one source register"); \
break; \
} \
} \
\
unsigned offset_0 = \
brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
unsigned offset_1 = offset_0; \
\
for (unsigned i = 0; i < exec_size; i++) { \
if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
break; \
} \
} \
\
ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
"The offset from the two source registers " \
"must be the same")
if (i == 0) {
DO_SRC(0);
} else {
DO_SRC(1);
}
#undef DO_SRC
}
}
/* The IVB and HSW PRMs say:
*
* When destination spans two registers, the source MUST span two
* registers. The exception to the above rule:
* 1. When source is scalar, the source registers are not
* incremented.
* 2. When source is packed integer Word and destination is packed
* integer DWord, the source register is not incremented by the
* source sub register is incremented.
*
* The SNB PRM does not contain this rule, but the internal documentation
* indicates that it applies to SNB as well. We assume that the rule applies
* to Gen <= 5 although their PRMs do not state it.
*
* While the documentation explicitly says in exception (2) that the
* destination must be an integer DWord, the hardware allows at least a
* float destination type as well. We emit such instructions from
*
* fs_visitor::emit_interpolation_setup_gfx6
* fs_visitor::emit_fragcoord_interpolation
*
* and have for years with no ill effects.
*
* Additionally the simulator source code indicates that the real condition
* is that the size of the destination type is 4 bytes.
*
* HSW PRMs also add a note to the second exception:
* "When lower 8 channels are disabled, the sub register of source1
* operand is not incremented. If the lower 8 channels are expected
* to be disabled, say by predication, the instruction must be split
* into pair of simd8 operations."
*
* We can't reliably know if the channels won't be disabled due to,
* for example, IMASK. So, play it safe and disallow packed-word exception
* for src1.
*/
if (devinfo->ver <= 7 && dst_regs == 2) {
enum brw_reg_type dst_type = inst_dst_type(isa, inst);
bool dst_is_packed_dword =
is_packed(exec_size * stride, exec_size, stride) &&
brw_reg_type_to_size(dst_type) == 4;
for (unsigned i = 0; i < num_sources; i++) {
#define DO_SRC(n) \
unsigned vstride, width, hstride; \
vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
bool src ## n ## _is_packed_word = \
n != 1 && is_packed(vstride, width, hstride) && \
(brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \
brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \
\
ERROR_IF(src ## n ## _regs == 1 && \
!src ## n ## _has_scalar_region(devinfo, inst) && \
!(dst_is_packed_dword && src ## n ## _is_packed_word), \
"When the destination spans two registers, the source must " \
"span two registers\n" ERROR_INDENT "(exceptions for scalar " \
"sources, and packed-word to packed-dword expansion for src0)")
if (i == 0) {
DO_SRC(0);
} else {
DO_SRC(1);
}
#undef DO_SRC
}
}
return error_msg;
}
@ -1835,7 +1579,6 @@ special_requirements_for_handling_double_precision_data_types(
unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
bool is_integer_dword_multiply =
devinfo->ver >= 8 &&
brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL &&
(brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
@ -1892,7 +1635,7 @@ special_requirements_for_handling_double_precision_data_types(
*/
if (is_double_precision &&
brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
intel_device_info_is_9lp(devinfo)) {
ERROR_IF(!is_scalar_region &&
(src_stride % 8 != 0 ||
dst_stride % 8 != 0 ||
@ -1917,7 +1660,7 @@ special_requirements_for_handling_double_precision_data_types(
* We assume that the restriction applies to GLK as well.
*/
if (is_double_precision &&
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
intel_device_info_is_9lp(devinfo)) {
ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
"Indirect addressing is not allowed when the execution type "
@ -1934,8 +1677,7 @@ special_requirements_for_handling_double_precision_data_types(
* We assume that the restriction does not apply to the null register.
*/
if (is_double_precision &&
(devinfo->platform == INTEL_PLATFORM_CHV ||
intel_device_info_is_9lp(devinfo))) {
intel_device_info_is_9lp(devinfo)) {
ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC ||
brw_inst_acc_wr_control(devinfo, inst) ||
(BRW_ARCHITECTURE_REGISTER_FILE == file &&
@ -2007,7 +1749,7 @@ special_requirements_for_handling_double_precision_data_types(
*
* We assume that the restriction applies to all Gfx8+ parts.
*/
if (is_double_precision && devinfo->ver >= 8) {
if (is_double_precision) {
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
enum brw_reg_type src1_type =
num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
@ -2030,7 +1772,7 @@ special_requirements_for_handling_double_precision_data_types(
* We assume that the restriction applies to GLK as well.
*/
if (is_double_precision &&
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
intel_device_info_is_9lp(devinfo)) {
ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
brw_inst_no_dd_clear(devinfo, inst),
"DepCtrl is not allowed when the execution type is 64-bit");
@ -2071,56 +1813,14 @@ instruction_restrictions(const struct brw_isa_info *isa,
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP ||
brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) {
if (devinfo->ver <= 7) {
/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
* ISA) says:
*
* Accumulator cannot be destination, implicit or explicit. The
* destination must be a general register or the null register.
*
* Page 77 of the Haswell PRM Volume 2b contains the same text. The
* 965G PRMs contain similar text.
*
* Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
*
* For the cmp and cmpn instructions, remove the accumulator
* restrictions.
*/
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
"Accumulator cannot be destination, implicit or explicit.");
}
/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
* says:
*
* If the destination is the null register, the {Switch} instruction
* option must be used.
*
* Page 77 of the Haswell PRM Volume 2b contains the same text.
*/
if (devinfo->ver == 7) {
ERROR_IF(dst_is_null(devinfo, inst) &&
brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,
"If the destination is the null register, the {Switch} "
"instruction option must be used.");
}
ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
"CMP (or CMPN) must have a condition.");
}
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_SEL) {
if (devinfo->ver < 6) {
ERROR_IF(brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE,
"SEL must not have a condition modifier");
ERROR_IF(brw_inst_pred_control(devinfo, inst) == BRW_PREDICATE_NONE,
"SEL must be predicated");
} else {
ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
"SEL must either be predicated or have a condition modifiers");
}
ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
"SEL must either be predicated or have a condition modifiers");
}
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
@ -2128,118 +1828,31 @@ instruction_restrictions(const struct brw_isa_info *isa,
const enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
if (devinfo->ver == 6) {
/* Page 223 of the Sandybridge PRM volume 4 part 2 says:
*
* [DevSNB]: When multiple (sic) a DW and a W, the W has to be on
* src0, and the DW has to be on src1.
*
* This text appears only in the Sandybridge PRMw.
*/
ERROR_IF(brw_reg_type_is_integer(src0_type) &&
type_sz(src0_type) == 4 && type_sz(src1_type) < 4,
"When multiplying a DW and any lower precision integer, the "
"DW operand must be src1.");
} else if (devinfo->ver >= 7) {
/* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
*
* When multiplying a DW and any lower precision integer, the DW
* operand must on src0.
*
* Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
* text.
*/
ERROR_IF(brw_reg_type_is_integer(src1_type) &&
type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
"When multiplying a DW and any lower precision integer, the "
"DW operand must be src0.");
}
/* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
*
* When multiplying a DW and any lower precision integer, the DW
* operand must on src0.
*
* Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
* text.
*/
ERROR_IF(brw_reg_type_is_integer(src1_type) &&
type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
"When multiplying a DW and any lower precision integer, the "
"DW operand must be src0.");
if (devinfo->ver <= 7) {
/* Section 14.2.28 of Intel 965 Express Chipset PRM volume 4 says:
*
* Source operands cannot be an accumulator register.
*
* Iron Lake, Sandybridge, and Ivy Bridge PRMs have the same text.
* Haswell does not. Given that later PRMs have different
* restrictions on accumulator sources (see below), it seems most
* likely that Haswell shares the Ivy Bridge restriction.
*/
ERROR_IF(src0_is_acc(devinfo, inst) || src1_is_acc(devinfo, inst),
"Source operands cannot be an accumulator register.");
} else {
/* Page 971 (page 987 of the PDF), section "Accumulator
* Restrictions," of the Broadwell PRM volume 7 says:
*
* Integer source operands cannot be accumulators.
*
* The Skylake and Ice Lake PRMs contain the same text.
*/
ERROR_IF((src0_is_acc(devinfo, inst) &&
brw_reg_type_is_integer(src0_type)) ||
(src1_is_acc(devinfo, inst) &&
brw_reg_type_is_integer(src1_type)),
"Integer source operands cannot be accumulators.");
}
if (devinfo->ver <= 6) {
/* Page 223 of the Sandybridge PRM volume 4 part 2 says:
*
* Dword integer source is not allowed for this instruction in
* float execution mode. In other words, if one source is of type
* float (:f, :vf), the other source cannot be of type dword
* integer (:ud or :d).
*
* G965 and Iron Lake PRMs have similar text. Later GPUs do not
* allow mixed source types at all, but that restriction should be
* handled elsewhere.
*/
ERROR_IF(execution_type(isa, inst) == BRW_REGISTER_TYPE_F &&
(src0_type == BRW_REGISTER_TYPE_UD ||
src0_type == BRW_REGISTER_TYPE_D ||
src1_type == BRW_REGISTER_TYPE_UD ||
src1_type == BRW_REGISTER_TYPE_D),
"Dword integer source is not allowed for this instruction in"
"float execution mode.");
}
if (devinfo->ver <= 7) {
/* Page 118 of the Haswell PRM volume 2b says:
*
* When operating on integers with at least one of the source
* being a DWord type (signed or unsigned), the destination cannot
* be floating-point (implementation note: the data converter only
* looks at the low 34 bits of the result).
*
* G965, Iron Lake, Sandybridge, and Ivy Bridge have similar text.
* Later GPUs do not allow mixed source and destination types at all,
* but that restriction should be handled elsewhere.
*/
ERROR_IF(dst_type == BRW_REGISTER_TYPE_F &&
(src0_type == BRW_REGISTER_TYPE_UD ||
src0_type == BRW_REGISTER_TYPE_D ||
src1_type == BRW_REGISTER_TYPE_UD ||
src1_type == BRW_REGISTER_TYPE_D),
"Float destination type not allowed with DWord source type.");
}
if (devinfo->ver == 8) {
/* Page 966 (page 982 of the PDF) of the Broadwell PRM volume 2a
* says:
*
* When multiplying DW x DW, the dst cannot be accumulator.
*
* This text also appears in the Cherry Trail / Braswell PRM, but it
* does not appear in any other PRM.
*/
ERROR_IF((src0_type == BRW_REGISTER_TYPE_UD ||
src0_type == BRW_REGISTER_TYPE_D) &&
(src1_type == BRW_REGISTER_TYPE_UD ||
src1_type == BRW_REGISTER_TYPE_D) &&
brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
"When multiplying DW x DW, the dst cannot be accumulator.");
}
/* Page 971 (page 987 of the PDF), section "Accumulator
* Restrictions," of the Broadwell PRM volume 7 says:
*
* Integer source operands cannot be accumulators.
*
* The Skylake and Ice Lake PRMs contain the same text.
*/
ERROR_IF((src0_is_acc(devinfo, inst) &&
brw_reg_type_is_integer(src0_type)) ||
(src1_is_acc(devinfo, inst) &&
brw_reg_type_is_integer(src1_type)),
"Integer source operands cannot be accumulators.");
/* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
*
@ -2353,34 +1966,32 @@ instruction_restrictions(const struct brw_isa_info *isa,
brw_inst_opcode(isa, inst) == BRW_OPCODE_AND ||
brw_inst_opcode(isa, inst) == BRW_OPCODE_XOR ||
brw_inst_opcode(isa, inst) == BRW_OPCODE_NOT) {
if (devinfo->ver >= 8) {
/* While the behavior of the negate source modifier is defined as
* logical not, the behavior of abs source modifier is not
* defined. Disallow it to be safe.
*/
ERROR_IF(brw_inst_src0_abs(devinfo, inst),
"Behavior of abs source modifier in logic ops is undefined.");
ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src1_abs(devinfo, inst),
"Behavior of abs source modifier in logic ops is undefined.");
/* While the behavior of the negate source modifier is defined as
* logical not, the behavior of abs source modifier is not
* defined. Disallow it to be safe.
*/
ERROR_IF(brw_inst_src0_abs(devinfo, inst),
"Behavior of abs source modifier in logic ops is undefined.");
ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
brw_inst_src1_abs(devinfo, inst),
"Behavior of abs source modifier in logic ops is undefined.");
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
*
* Source modifier is not allowed if source is an accumulator.
*
* The same text also appears for OR, NOT, and XOR instructions.
*/
ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
brw_inst_src0_negate(devinfo, inst)) &&
src0_is_acc(devinfo, inst),
"Source modifier is not allowed if source is an accumulator.");
ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
(brw_inst_src1_abs(devinfo, inst) ||
brw_inst_src1_negate(devinfo, inst)) &&
src1_is_acc(devinfo, inst),
"Source modifier is not allowed if source is an accumulator.");
}
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
*
* Source modifier is not allowed if source is an accumulator.
*
* The same text also appears for OR, NOT, and XOR instructions.
*/
ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
brw_inst_src0_negate(devinfo, inst)) &&
src0_is_acc(devinfo, inst),
"Source modifier is not allowed if source is an accumulator.");
ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
(brw_inst_src1_abs(devinfo, inst) ||
brw_inst_src1_negate(devinfo, inst)) &&
src1_is_acc(devinfo, inst),
"Source modifier is not allowed if source is an accumulator.");
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
*
@ -2456,9 +2067,7 @@ instruction_restrictions(const struct brw_isa_info *isa,
else
dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
if (devinfo->ver < 8) {
ERROR_IF(devinfo->ver < 8, "CSEL not supported before Gfx8");
} else if (devinfo->ver <= 9) {
if (devinfo->ver == 9) {
ERROR_IF(dst_type != BRW_REGISTER_TYPE_F,
"CSEL destination type must be F");
} else {
@ -2680,69 +2289,23 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
}
if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
/* Gfx4 doesn't have a "header present" bit in the SEND message. */
ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
ERROR_IF(!brw_inst_header_present(devinfo, inst),
"Header must be present for all URB messages.");
switch (brw_inst_urb_opcode(devinfo, inst)) {
case BRW_URB_OPCODE_WRITE_HWORD:
break;
/* case FF_SYNC: */
case BRW_URB_OPCODE_WRITE_OWORD:
/* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the
* same opcode value.
*/
if (devinfo->ver == 5 || devinfo->ver == 6) {
ERROR_IF(brw_inst_urb_global_offset(devinfo, inst) != 0,
"FF_SYNC global offset must be zero.");
ERROR_IF(brw_inst_urb_swizzle_control(devinfo, inst) != 0,
"FF_SYNC swizzle control must be zero.");
ERROR_IF(brw_inst_urb_used(devinfo, inst) != 0,
"FF_SYNC used must be zero.");
ERROR_IF(brw_inst_urb_complete(devinfo, inst) != 0,
"FF_SYNC complete must be zero.");
/* Volume 4 part 2 of the Sandybridge PRM (page 28) says:
*
* A message response (writeback) length of 1 GRF will be
* indicated on the send instruction if the thread requires
* response data and/or synchronization.
*/
ERROR_IF((unsigned)brw_inst_rlen(devinfo, inst) > 1,
"FF_SYNC read length must be 0 or 1.");
} else {
ERROR_IF(devinfo->ver < 7,
"URB OWORD write messages only valid on gfx >= 7");
}
break;
case BRW_URB_OPCODE_READ_HWORD:
case BRW_URB_OPCODE_READ_OWORD:
ERROR_IF(devinfo->ver < 7,
"URB read messages only valid on gfx >= 7");
break;
case GFX7_URB_OPCODE_ATOMIC_MOV:
case BRW_URB_OPCODE_WRITE_HWORD:
case BRW_URB_OPCODE_WRITE_OWORD:
case GFX7_URB_OPCODE_ATOMIC_INC:
ERROR_IF(devinfo->ver < 7,
"URB atomic move and increment messages only valid on gfx >= 7");
break;
case GFX7_URB_OPCODE_ATOMIC_MOV:
case GFX8_URB_OPCODE_ATOMIC_ADD:
/* The Haswell PRM lists this opcode as valid on page 317. */
ERROR_IF(devinfo->verx10 < 75,
"URB atomic add message only valid on gfx >= 7.5");
case GFX8_URB_OPCODE_SIMD8_WRITE:
break;
case GFX8_URB_OPCODE_SIMD8_READ:
ERROR_IF(brw_inst_rlen(devinfo, inst) == 0,
"URB SIMD8 read message must read some data.");
FALLTHROUGH;
case GFX8_URB_OPCODE_SIMD8_WRITE:
ERROR_IF(devinfo->ver < 8,
"URB SIMD8 messages only valid on gfx >= 8");
break;
case GFX125_URB_OPCODE_FENCE: