mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-22 15:18:09 +02:00
intel/brw: Remove Gfx8- code from EU validation
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
parent
f321e555b6
commit
99f173ddd2
1 changed files with 97 additions and 534 deletions
|
|
@ -266,14 +266,12 @@ invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
|
|||
* No invalid encodings on Gfx10-12 (align1)
|
||||
*/
|
||||
} else {
|
||||
if (devinfo->ver > 6) {
|
||||
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
|
||||
(num_sources > 0 &&
|
||||
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
|
||||
(num_sources > 1 &&
|
||||
brw_inst_src1_reg_file(devinfo, inst) == MRF),
|
||||
"invalid register file encoding");
|
||||
}
|
||||
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
|
||||
(num_sources > 0 &&
|
||||
brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
|
||||
(num_sources > 1 &&
|
||||
brw_inst_src1_reg_file(devinfo, inst) == MRF),
|
||||
"invalid register file encoding");
|
||||
}
|
||||
|
||||
if (error_msg.str)
|
||||
|
|
@ -420,24 +418,20 @@ send_restrictions(const struct brw_isa_info *isa,
|
|||
ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
|
||||
"send must use direct addressing");
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
|
||||
"send from non-GRF");
|
||||
ERROR_IF(brw_inst_eot(devinfo, inst) &&
|
||||
brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
|
||||
"send with EOT must use g112-g127");
|
||||
}
|
||||
ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
|
||||
"send from non-GRF");
|
||||
ERROR_IF(brw_inst_eot(devinfo, inst) &&
|
||||
brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
|
||||
"send with EOT must use g112-g127");
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
ERROR_IF(!dst_is_null(devinfo, inst) &&
|
||||
(brw_inst_dst_da_reg_nr(devinfo, inst) +
|
||||
brw_inst_rlen(devinfo, inst) > 127) &&
|
||||
(brw_inst_src0_da_reg_nr(devinfo, inst) +
|
||||
brw_inst_mlen(devinfo, inst) >
|
||||
brw_inst_dst_da_reg_nr(devinfo, inst)),
|
||||
"r127 must not be used for return address when there is "
|
||||
"a src and dest overlap");
|
||||
}
|
||||
ERROR_IF(!dst_is_null(devinfo, inst) &&
|
||||
(brw_inst_dst_da_reg_nr(devinfo, inst) +
|
||||
brw_inst_rlen(devinfo, inst) > 127) &&
|
||||
(brw_inst_src0_da_reg_nr(devinfo, inst) +
|
||||
brw_inst_mlen(devinfo, inst) >
|
||||
brw_inst_dst_da_reg_nr(devinfo, inst)),
|
||||
"r127 must not be used for return address when there is "
|
||||
"a src and dest overlap");
|
||||
}
|
||||
|
||||
return error_msg;
|
||||
|
|
@ -530,14 +524,6 @@ execution_type(const struct brw_isa_info *isa, const brw_inst *inst)
|
|||
src1_exec_type == BRW_REGISTER_TYPE_NF)
|
||||
return BRW_REGISTER_TYPE_NF;
|
||||
|
||||
/* Mixed operand types where one is float is float on Gen < 6
|
||||
* (and not allowed on later platforms)
|
||||
*/
|
||||
if (devinfo->ver < 6 &&
|
||||
(src0_exec_type == BRW_REGISTER_TYPE_F ||
|
||||
src1_exec_type == BRW_REGISTER_TYPE_F))
|
||||
return BRW_REGISTER_TYPE_F;
|
||||
|
||||
if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
|
||||
src1_exec_type == BRW_REGISTER_TYPE_Q)
|
||||
return BRW_REGISTER_TYPE_Q;
|
||||
|
|
@ -626,9 +612,6 @@ is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst)
|
|||
{
|
||||
const struct intel_device_info *devinfo = isa->devinfo;
|
||||
|
||||
if (devinfo->ver < 8)
|
||||
return false;
|
||||
|
||||
if (inst_is_send(isa, inst))
|
||||
return false;
|
||||
|
||||
|
|
@ -809,14 +792,6 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
unsigned exec_type_size = brw_reg_type_to_size(exec_type);
|
||||
unsigned dst_type_size = brw_reg_type_to_size(dst_type);
|
||||
|
||||
/* On IVB/BYT, region parameters and execution size for DF are in terms of
|
||||
* 32-bit elements, so they are doubled. For evaluating the validity of an
|
||||
* instruction, we halve them.
|
||||
*/
|
||||
if (devinfo->verx10 == 70 &&
|
||||
exec_type_size == 8 && dst_type_size == 4)
|
||||
dst_type_size = 8;
|
||||
|
||||
if (is_byte_conversion(isa, inst)) {
|
||||
/* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
|
||||
*
|
||||
|
|
@ -913,9 +888,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
ERROR_IF(subreg % 4 != 0,
|
||||
"Conversions between integer and half-float must be "
|
||||
"aligned to a DWord on the destination");
|
||||
} else if ((devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
devinfo->ver >= 9) &&
|
||||
dst_type == BRW_REGISTER_TYPE_HF) {
|
||||
} else if (dst_type == BRW_REGISTER_TYPE_HF) {
|
||||
unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||
ERROR_IF(dst_stride != 2 &&
|
||||
!(is_mixed_float(isa, inst) &&
|
||||
|
|
@ -931,9 +904,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
* override the general rule for the ratio of sizes of the destination type
|
||||
* and the execution type. We will add validation for those in a later patch.
|
||||
*/
|
||||
bool validate_dst_size_and_exec_size_ratio =
|
||||
!is_mixed_float(isa, inst) ||
|
||||
!(devinfo->platform == INTEL_PLATFORM_CHV || devinfo->ver >= 9);
|
||||
bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(isa, inst);
|
||||
|
||||
if (validate_dst_size_and_exec_size_ratio &&
|
||||
exec_type_size > dst_type_size) {
|
||||
|
|
@ -952,7 +923,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
* Implementation Restriction: The relaxed alignment rule for byte
|
||||
* destination (#10.5) is not supported.
|
||||
*/
|
||||
if (devinfo->verx10 >= 45 && dst_type_is_byte) {
|
||||
if (dst_type_is_byte) {
|
||||
ERROR_IF(subreg % exec_type_size != 0 &&
|
||||
subreg % exec_type_size != 1,
|
||||
"Destination subreg must be aligned to the size of the "
|
||||
|
|
@ -1000,33 +971,19 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
|
|||
"Destination Horizontal Stride must be 1");
|
||||
|
||||
if (num_sources >= 1) {
|
||||
if (devinfo->verx10 >= 75) {
|
||||
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
|
||||
} else {
|
||||
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0 or 4 is allowed");
|
||||
}
|
||||
ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
|
||||
brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
|
||||
}
|
||||
|
||||
if (num_sources == 2) {
|
||||
if (devinfo->verx10 >= 75) {
|
||||
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
|
||||
} else {
|
||||
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0 or 4 is allowed");
|
||||
}
|
||||
ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
|
||||
brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
|
||||
"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
|
||||
}
|
||||
|
||||
return error_msg;
|
||||
|
|
@ -1055,14 +1012,6 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
|
|||
}
|
||||
#undef DO_SRC
|
||||
|
||||
/* On IVB/BYT, region parameters and execution size for DF are in terms of
|
||||
* 32-bit elements, so they are doubled. For evaluating the validity of an
|
||||
* instruction, we halve them.
|
||||
*/
|
||||
if (devinfo->verx10 == 70 &&
|
||||
element_size == 8)
|
||||
element_size = 4;
|
||||
|
||||
/* ExecSize must be greater than or equal to Width. */
|
||||
ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
|
||||
"to Width");
|
||||
|
|
@ -1500,74 +1449,14 @@ region_alignment_rules(const struct brw_isa_info *isa,
|
|||
if (error_msg.str)
|
||||
return error_msg;
|
||||
|
||||
/* On IVB/BYT, region parameters and execution size for DF are in terms of
|
||||
* 32-bit elements, so they are doubled. For evaluating the validity of an
|
||||
* instruction, we halve them.
|
||||
*/
|
||||
if (devinfo->verx10 == 70 &&
|
||||
element_size == 8)
|
||||
element_size = 4;
|
||||
|
||||
align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
|
||||
exec_size == 1 ? 0 : exec_size * stride,
|
||||
exec_size == 1 ? 1 : exec_size,
|
||||
exec_size == 1 ? 0 : stride);
|
||||
|
||||
unsigned dst_regs = registers_read(dst_access_mask);
|
||||
unsigned src0_regs = registers_read(src0_access_mask);
|
||||
unsigned src1_regs = registers_read(src1_access_mask);
|
||||
|
||||
/* The SNB, IVB, HSW, BDW, and CHV PRMs say:
|
||||
*
|
||||
* When an instruction has a source region spanning two registers and a
|
||||
* destination region contained in one register, the number of elements
|
||||
* must be the same between two sources and one of the following must be
|
||||
* true:
|
||||
*
|
||||
* 1. The destination region is entirely contained in the lower OWord
|
||||
* of a register.
|
||||
* 2. The destination region is entirely contained in the upper OWord
|
||||
* of a register.
|
||||
* 3. The destination elements are evenly split between the two OWords
|
||||
* of a register.
|
||||
*/
|
||||
if (devinfo->ver <= 8) {
|
||||
if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
|
||||
unsigned upper_oword_writes = 0, lower_oword_writes = 0;
|
||||
|
||||
for (unsigned i = 0; i < exec_size; i++) {
|
||||
if (dst_access_mask[i] > 0x0000FFFF) {
|
||||
upper_oword_writes++;
|
||||
} else {
|
||||
assert(dst_access_mask[i] != 0);
|
||||
lower_oword_writes++;
|
||||
}
|
||||
}
|
||||
|
||||
ERROR_IF(lower_oword_writes != 0 &&
|
||||
upper_oword_writes != 0 &&
|
||||
upper_oword_writes != lower_oword_writes,
|
||||
"Writes must be to only one OWord or "
|
||||
"evenly split between OWords");
|
||||
}
|
||||
}
|
||||
|
||||
/* The IVB and HSW PRMs say:
|
||||
*
|
||||
* When an instruction has a source region that spans two registers and
|
||||
* the destination spans two registers, the destination elements must be
|
||||
* evenly split between the two registers [...]
|
||||
*
|
||||
* The SNB PRM contains similar wording (but written in a much more
|
||||
* confusing manner).
|
||||
*
|
||||
* The BDW PRM says:
|
||||
*
|
||||
* When destination spans two registers, the source may be one or two
|
||||
* registers. The destination elements must be evenly split between the
|
||||
* two registers.
|
||||
*
|
||||
* The SKL PRM says:
|
||||
/* The SKL PRM says:
|
||||
*
|
||||
* When destination of MATH instruction spans two registers, the
|
||||
* destination elements must be evenly split between the two registers.
|
||||
|
|
@ -1575,15 +1464,7 @@ region_alignment_rules(const struct brw_isa_info *isa,
|
|||
* It is not known whether this restriction applies to KBL other Gens after
|
||||
* SKL.
|
||||
*/
|
||||
if (devinfo->ver <= 8 ||
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
|
||||
|
||||
/* Nothing explicitly states that on Gen < 8 elements must be evenly
|
||||
* split between two destination registers in the two exceptional
|
||||
* source-region-spans-one-register cases, but since Broadwell requires
|
||||
* evenly split writes regardless of source region, we assume that it was
|
||||
* an oversight and require it.
|
||||
*/
|
||||
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
|
||||
if (dst_regs == 2) {
|
||||
unsigned upper_reg_writes = 0, lower_reg_writes = 0;
|
||||
|
||||
|
|
@ -1602,143 +1483,6 @@ region_alignment_rules(const struct brw_isa_info *isa,
|
|||
}
|
||||
}
|
||||
|
||||
/* The IVB and HSW PRMs say:
|
||||
*
|
||||
* When an instruction has a source region that spans two registers and
|
||||
* the destination spans two registers, the destination elements must be
|
||||
* evenly split between the two registers and each destination register
|
||||
* must be entirely derived from one source register.
|
||||
*
|
||||
* Note: In such cases, the regioning parameters must ensure that the
|
||||
* offset from the two source registers is the same.
|
||||
*
|
||||
* The SNB PRM contains similar wording (but written in a much more
|
||||
* confusing manner).
|
||||
*
|
||||
* There are effectively three rules stated here:
|
||||
*
|
||||
* For an instruction with a source and a destination spanning two
|
||||
* registers,
|
||||
*
|
||||
* (1) destination elements must be evenly split between the two
|
||||
* registers
|
||||
* (2) all destination elements in a register must be derived
|
||||
* from one source register
|
||||
* (3) the offset (i.e. the starting location in each of the two
|
||||
* registers spanned by a region) must be the same in the two
|
||||
* registers spanned by a region
|
||||
*
|
||||
* It is impossible to violate rule (1) without violating (2) or (3), so we
|
||||
* do not attempt to validate it.
|
||||
*/
|
||||
if (devinfo->ver <= 7 && dst_regs == 2) {
|
||||
for (unsigned i = 0; i < num_sources; i++) {
|
||||
#define DO_SRC(n) \
|
||||
if (src ## n ## _regs <= 1) \
|
||||
continue; \
|
||||
\
|
||||
for (unsigned i = 0; i < exec_size; i++) { \
|
||||
if ((dst_access_mask[i] > 0xFFFFFFFF) != \
|
||||
(src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
|
||||
ERROR("Each destination register must be entirely derived " \
|
||||
"from one source register"); \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
unsigned offset_0 = \
|
||||
brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
|
||||
unsigned offset_1 = offset_0; \
|
||||
\
|
||||
for (unsigned i = 0; i < exec_size; i++) { \
|
||||
if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
|
||||
offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
|
||||
"The offset from the two source registers " \
|
||||
"must be the same")
|
||||
|
||||
if (i == 0) {
|
||||
DO_SRC(0);
|
||||
} else {
|
||||
DO_SRC(1);
|
||||
}
|
||||
#undef DO_SRC
|
||||
}
|
||||
}
|
||||
|
||||
/* The IVB and HSW PRMs say:
|
||||
*
|
||||
* When destination spans two registers, the source MUST span two
|
||||
* registers. The exception to the above rule:
|
||||
* 1. When source is scalar, the source registers are not
|
||||
* incremented.
|
||||
* 2. When source is packed integer Word and destination is packed
|
||||
* integer DWord, the source register is not incremented by the
|
||||
* source sub register is incremented.
|
||||
*
|
||||
* The SNB PRM does not contain this rule, but the internal documentation
|
||||
* indicates that it applies to SNB as well. We assume that the rule applies
|
||||
* to Gen <= 5 although their PRMs do not state it.
|
||||
*
|
||||
* While the documentation explicitly says in exception (2) that the
|
||||
* destination must be an integer DWord, the hardware allows at least a
|
||||
* float destination type as well. We emit such instructions from
|
||||
*
|
||||
* fs_visitor::emit_interpolation_setup_gfx6
|
||||
* fs_visitor::emit_fragcoord_interpolation
|
||||
*
|
||||
* and have for years with no ill effects.
|
||||
*
|
||||
* Additionally the simulator source code indicates that the real condition
|
||||
* is that the size of the destination type is 4 bytes.
|
||||
*
|
||||
* HSW PRMs also add a note to the second exception:
|
||||
* "When lower 8 channels are disabled, the sub register of source1
|
||||
* operand is not incremented. If the lower 8 channels are expected
|
||||
* to be disabled, say by predication, the instruction must be split
|
||||
* into pair of simd8 operations."
|
||||
*
|
||||
* We can't reliably know if the channels won't be disabled due to,
|
||||
* for example, IMASK. So, play it safe and disallow packed-word exception
|
||||
* for src1.
|
||||
*/
|
||||
if (devinfo->ver <= 7 && dst_regs == 2) {
|
||||
enum brw_reg_type dst_type = inst_dst_type(isa, inst);
|
||||
bool dst_is_packed_dword =
|
||||
is_packed(exec_size * stride, exec_size, stride) &&
|
||||
brw_reg_type_to_size(dst_type) == 4;
|
||||
|
||||
for (unsigned i = 0; i < num_sources; i++) {
|
||||
#define DO_SRC(n) \
|
||||
unsigned vstride, width, hstride; \
|
||||
vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
|
||||
width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
|
||||
hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
|
||||
bool src ## n ## _is_packed_word = \
|
||||
n != 1 && is_packed(vstride, width, hstride) && \
|
||||
(brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \
|
||||
brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \
|
||||
\
|
||||
ERROR_IF(src ## n ## _regs == 1 && \
|
||||
!src ## n ## _has_scalar_region(devinfo, inst) && \
|
||||
!(dst_is_packed_dword && src ## n ## _is_packed_word), \
|
||||
"When the destination spans two registers, the source must " \
|
||||
"span two registers\n" ERROR_INDENT "(exceptions for scalar " \
|
||||
"sources, and packed-word to packed-dword expansion for src0)")
|
||||
|
||||
if (i == 0) {
|
||||
DO_SRC(0);
|
||||
} else {
|
||||
DO_SRC(1);
|
||||
}
|
||||
#undef DO_SRC
|
||||
}
|
||||
}
|
||||
|
||||
return error_msg;
|
||||
}
|
||||
|
||||
|
|
@ -1835,7 +1579,6 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
|
||||
|
||||
bool is_integer_dword_multiply =
|
||||
devinfo->ver >= 8 &&
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL &&
|
||||
(brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
|
||||
brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
|
||||
|
|
@ -1892,7 +1635,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
*/
|
||||
if (is_double_precision &&
|
||||
brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
|
||||
intel_device_info_is_9lp(devinfo)) {
|
||||
ERROR_IF(!is_scalar_region &&
|
||||
(src_stride % 8 != 0 ||
|
||||
dst_stride % 8 != 0 ||
|
||||
|
|
@ -1917,7 +1660,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
* We assume that the restriction applies to GLK as well.
|
||||
*/
|
||||
if (is_double_precision &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
|
||||
intel_device_info_is_9lp(devinfo)) {
|
||||
ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
|
||||
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
|
||||
"Indirect addressing is not allowed when the execution type "
|
||||
|
|
@ -1934,8 +1677,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
* We assume that the restriction does not apply to the null register.
|
||||
*/
|
||||
if (is_double_precision &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV ||
|
||||
intel_device_info_is_9lp(devinfo))) {
|
||||
intel_device_info_is_9lp(devinfo)) {
|
||||
ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC ||
|
||||
brw_inst_acc_wr_control(devinfo, inst) ||
|
||||
(BRW_ARCHITECTURE_REGISTER_FILE == file &&
|
||||
|
|
@ -2007,7 +1749,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
*
|
||||
* We assume that the restriction applies to all Gfx8+ parts.
|
||||
*/
|
||||
if (is_double_precision && devinfo->ver >= 8) {
|
||||
if (is_double_precision) {
|
||||
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||
enum brw_reg_type src1_type =
|
||||
num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
|
||||
|
|
@ -2030,7 +1772,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
* We assume that the restriction applies to GLK as well.
|
||||
*/
|
||||
if (is_double_precision &&
|
||||
(devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
|
||||
intel_device_info_is_9lp(devinfo)) {
|
||||
ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
|
||||
brw_inst_no_dd_clear(devinfo, inst),
|
||||
"DepCtrl is not allowed when the execution type is 64-bit");
|
||||
|
|
@ -2071,56 +1813,14 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
|
||||
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP ||
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) {
|
||||
if (devinfo->ver <= 7) {
|
||||
/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
|
||||
* ISA) says:
|
||||
*
|
||||
* Accumulator cannot be destination, implicit or explicit. The
|
||||
* destination must be a general register or the null register.
|
||||
*
|
||||
* Page 77 of the Haswell PRM Volume 2b contains the same text. The
|
||||
* 965G PRMs contain similar text.
|
||||
*
|
||||
* Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
|
||||
*
|
||||
* For the cmp and cmpn instructions, remove the accumulator
|
||||
* restrictions.
|
||||
*/
|
||||
ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||
brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
|
||||
"Accumulator cannot be destination, implicit or explicit.");
|
||||
}
|
||||
|
||||
/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
|
||||
* says:
|
||||
*
|
||||
* If the destination is the null register, the {Switch} instruction
|
||||
* option must be used.
|
||||
*
|
||||
* Page 77 of the Haswell PRM Volume 2b contains the same text.
|
||||
*/
|
||||
if (devinfo->ver == 7) {
|
||||
ERROR_IF(dst_is_null(devinfo, inst) &&
|
||||
brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,
|
||||
"If the destination is the null register, the {Switch} "
|
||||
"instruction option must be used.");
|
||||
}
|
||||
|
||||
ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
|
||||
"CMP (or CMPN) must have a condition.");
|
||||
}
|
||||
|
||||
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_SEL) {
|
||||
if (devinfo->ver < 6) {
|
||||
ERROR_IF(brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE,
|
||||
"SEL must not have a condition modifier");
|
||||
ERROR_IF(brw_inst_pred_control(devinfo, inst) == BRW_PREDICATE_NONE,
|
||||
"SEL must be predicated");
|
||||
} else {
|
||||
ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
|
||||
(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
|
||||
"SEL must either be predicated or have a condition modifiers");
|
||||
}
|
||||
ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
|
||||
(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
|
||||
"SEL must either be predicated or have a condition modifiers");
|
||||
}
|
||||
|
||||
if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
|
||||
|
|
@ -2128,118 +1828,31 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
const enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
|
||||
const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* Page 223 of the Sandybridge PRM volume 4 part 2 says:
|
||||
*
|
||||
* [DevSNB]: When multiple (sic) a DW and a W, the W has to be on
|
||||
* src0, and the DW has to be on src1.
|
||||
*
|
||||
* This text appears only in the Sandybridge PRMw.
|
||||
*/
|
||||
ERROR_IF(brw_reg_type_is_integer(src0_type) &&
|
||||
type_sz(src0_type) == 4 && type_sz(src1_type) < 4,
|
||||
"When multiplying a DW and any lower precision integer, the "
|
||||
"DW operand must be src1.");
|
||||
} else if (devinfo->ver >= 7) {
|
||||
/* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
|
||||
*
|
||||
* When multiplying a DW and any lower precision integer, the DW
|
||||
* operand must on src0.
|
||||
*
|
||||
* Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
|
||||
* text.
|
||||
*/
|
||||
ERROR_IF(brw_reg_type_is_integer(src1_type) &&
|
||||
type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
|
||||
"When multiplying a DW and any lower precision integer, the "
|
||||
"DW operand must be src0.");
|
||||
}
|
||||
/* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
|
||||
*
|
||||
* When multiplying a DW and any lower precision integer, the DW
|
||||
* operand must on src0.
|
||||
*
|
||||
* Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
|
||||
* text.
|
||||
*/
|
||||
ERROR_IF(brw_reg_type_is_integer(src1_type) &&
|
||||
type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
|
||||
"When multiplying a DW and any lower precision integer, the "
|
||||
"DW operand must be src0.");
|
||||
|
||||
if (devinfo->ver <= 7) {
|
||||
/* Section 14.2.28 of Intel 965 Express Chipset PRM volume 4 says:
|
||||
*
|
||||
* Source operands cannot be an accumulator register.
|
||||
*
|
||||
* Iron Lake, Sandybridge, and Ivy Bridge PRMs have the same text.
|
||||
* Haswell does not. Given that later PRMs have different
|
||||
* restrictions on accumulator sources (see below), it seems most
|
||||
* likely that Haswell shares the Ivy Bridge restriction.
|
||||
*/
|
||||
ERROR_IF(src0_is_acc(devinfo, inst) || src1_is_acc(devinfo, inst),
|
||||
"Source operands cannot be an accumulator register.");
|
||||
} else {
|
||||
/* Page 971 (page 987 of the PDF), section "Accumulator
|
||||
* Restrictions," of the Broadwell PRM volume 7 says:
|
||||
*
|
||||
* Integer source operands cannot be accumulators.
|
||||
*
|
||||
* The Skylake and Ice Lake PRMs contain the same text.
|
||||
*/
|
||||
ERROR_IF((src0_is_acc(devinfo, inst) &&
|
||||
brw_reg_type_is_integer(src0_type)) ||
|
||||
(src1_is_acc(devinfo, inst) &&
|
||||
brw_reg_type_is_integer(src1_type)),
|
||||
"Integer source operands cannot be accumulators.");
|
||||
}
|
||||
|
||||
if (devinfo->ver <= 6) {
|
||||
/* Page 223 of the Sandybridge PRM volume 4 part 2 says:
|
||||
*
|
||||
* Dword integer source is not allowed for this instruction in
|
||||
* float execution mode. In other words, if one source is of type
|
||||
* float (:f, :vf), the other source cannot be of type dword
|
||||
* integer (:ud or :d).
|
||||
*
|
||||
* G965 and Iron Lake PRMs have similar text. Later GPUs do not
|
||||
* allow mixed source types at all, but that restriction should be
|
||||
* handled elsewhere.
|
||||
*/
|
||||
ERROR_IF(execution_type(isa, inst) == BRW_REGISTER_TYPE_F &&
|
||||
(src0_type == BRW_REGISTER_TYPE_UD ||
|
||||
src0_type == BRW_REGISTER_TYPE_D ||
|
||||
src1_type == BRW_REGISTER_TYPE_UD ||
|
||||
src1_type == BRW_REGISTER_TYPE_D),
|
||||
"Dword integer source is not allowed for this instruction in"
|
||||
"float execution mode.");
|
||||
}
|
||||
|
||||
if (devinfo->ver <= 7) {
|
||||
/* Page 118 of the Haswell PRM volume 2b says:
|
||||
*
|
||||
* When operating on integers with at least one of the source
|
||||
* being a DWord type (signed or unsigned), the destination cannot
|
||||
* be floating-point (implementation note: the data converter only
|
||||
* looks at the low 34 bits of the result).
|
||||
*
|
||||
* G965, Iron Lake, Sandybridge, and Ivy Bridge have similar text.
|
||||
* Later GPUs do not allow mixed source and destination types at all,
|
||||
* but that restriction should be handled elsewhere.
|
||||
*/
|
||||
ERROR_IF(dst_type == BRW_REGISTER_TYPE_F &&
|
||||
(src0_type == BRW_REGISTER_TYPE_UD ||
|
||||
src0_type == BRW_REGISTER_TYPE_D ||
|
||||
src1_type == BRW_REGISTER_TYPE_UD ||
|
||||
src1_type == BRW_REGISTER_TYPE_D),
|
||||
"Float destination type not allowed with DWord source type.");
|
||||
}
|
||||
|
||||
if (devinfo->ver == 8) {
|
||||
/* Page 966 (page 982 of the PDF) of the Broadwell PRM volume 2a
|
||||
* says:
|
||||
*
|
||||
* When multiplying DW x DW, the dst cannot be accumulator.
|
||||
*
|
||||
* This text also appears in the Cherry Trail / Braswell PRM, but it
|
||||
* does not appear in any other PRM.
|
||||
*/
|
||||
ERROR_IF((src0_type == BRW_REGISTER_TYPE_UD ||
|
||||
src0_type == BRW_REGISTER_TYPE_D) &&
|
||||
(src1_type == BRW_REGISTER_TYPE_UD ||
|
||||
src1_type == BRW_REGISTER_TYPE_D) &&
|
||||
brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||
brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
|
||||
"When multiplying DW x DW, the dst cannot be accumulator.");
|
||||
}
|
||||
/* Page 971 (page 987 of the PDF), section "Accumulator
|
||||
* Restrictions," of the Broadwell PRM volume 7 says:
|
||||
*
|
||||
* Integer source operands cannot be accumulators.
|
||||
*
|
||||
* The Skylake and Ice Lake PRMs contain the same text.
|
||||
*/
|
||||
ERROR_IF((src0_is_acc(devinfo, inst) &&
|
||||
brw_reg_type_is_integer(src0_type)) ||
|
||||
(src1_is_acc(devinfo, inst) &&
|
||||
brw_reg_type_is_integer(src1_type)),
|
||||
"Integer source operands cannot be accumulators.");
|
||||
|
||||
/* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
|
||||
*
|
||||
|
|
@ -2353,34 +1966,32 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
brw_inst_opcode(isa, inst) == BRW_OPCODE_AND ||
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_XOR ||
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_NOT) {
|
||||
if (devinfo->ver >= 8) {
|
||||
/* While the behavior of the negate source modifier is defined as
|
||||
* logical not, the behavior of abs source modifier is not
|
||||
* defined. Disallow it to be safe.
|
||||
*/
|
||||
ERROR_IF(brw_inst_src0_abs(devinfo, inst),
|
||||
"Behavior of abs source modifier in logic ops is undefined.");
|
||||
ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
|
||||
brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src1_abs(devinfo, inst),
|
||||
"Behavior of abs source modifier in logic ops is undefined.");
|
||||
/* While the behavior of the negate source modifier is defined as
|
||||
* logical not, the behavior of abs source modifier is not
|
||||
* defined. Disallow it to be safe.
|
||||
*/
|
||||
ERROR_IF(brw_inst_src0_abs(devinfo, inst),
|
||||
"Behavior of abs source modifier in logic ops is undefined.");
|
||||
ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
|
||||
brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
|
||||
brw_inst_src1_abs(devinfo, inst),
|
||||
"Behavior of abs source modifier in logic ops is undefined.");
|
||||
|
||||
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
|
||||
*
|
||||
* Source modifier is not allowed if source is an accumulator.
|
||||
*
|
||||
* The same text also appears for OR, NOT, and XOR instructions.
|
||||
*/
|
||||
ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
|
||||
brw_inst_src0_negate(devinfo, inst)) &&
|
||||
src0_is_acc(devinfo, inst),
|
||||
"Source modifier is not allowed if source is an accumulator.");
|
||||
ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
|
||||
(brw_inst_src1_abs(devinfo, inst) ||
|
||||
brw_inst_src1_negate(devinfo, inst)) &&
|
||||
src1_is_acc(devinfo, inst),
|
||||
"Source modifier is not allowed if source is an accumulator.");
|
||||
}
|
||||
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
|
||||
*
|
||||
* Source modifier is not allowed if source is an accumulator.
|
||||
*
|
||||
* The same text also appears for OR, NOT, and XOR instructions.
|
||||
*/
|
||||
ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
|
||||
brw_inst_src0_negate(devinfo, inst)) &&
|
||||
src0_is_acc(devinfo, inst),
|
||||
"Source modifier is not allowed if source is an accumulator.");
|
||||
ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
|
||||
(brw_inst_src1_abs(devinfo, inst) ||
|
||||
brw_inst_src1_negate(devinfo, inst)) &&
|
||||
src1_is_acc(devinfo, inst),
|
||||
"Source modifier is not allowed if source is an accumulator.");
|
||||
|
||||
/* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
|
||||
*
|
||||
|
|
@ -2456,9 +2067,7 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
else
|
||||
dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
|
||||
|
||||
if (devinfo->ver < 8) {
|
||||
ERROR_IF(devinfo->ver < 8, "CSEL not supported before Gfx8");
|
||||
} else if (devinfo->ver <= 9) {
|
||||
if (devinfo->ver == 9) {
|
||||
ERROR_IF(dst_type != BRW_REGISTER_TYPE_F,
|
||||
"CSEL destination type must be F");
|
||||
} else {
|
||||
|
|
@ -2680,69 +2289,23 @@ send_descriptor_restrictions(const struct brw_isa_info *isa,
|
|||
}
|
||||
|
||||
if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
|
||||
/* Gfx4 doesn't have a "header present" bit in the SEND message. */
|
||||
ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
|
||||
ERROR_IF(!brw_inst_header_present(devinfo, inst),
|
||||
"Header must be present for all URB messages.");
|
||||
|
||||
switch (brw_inst_urb_opcode(devinfo, inst)) {
|
||||
case BRW_URB_OPCODE_WRITE_HWORD:
|
||||
break;
|
||||
|
||||
/* case FF_SYNC: */
|
||||
case BRW_URB_OPCODE_WRITE_OWORD:
|
||||
/* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the
|
||||
* same opcode value.
|
||||
*/
|
||||
if (devinfo->ver == 5 || devinfo->ver == 6) {
|
||||
ERROR_IF(brw_inst_urb_global_offset(devinfo, inst) != 0,
|
||||
"FF_SYNC global offset must be zero.");
|
||||
ERROR_IF(brw_inst_urb_swizzle_control(devinfo, inst) != 0,
|
||||
"FF_SYNC swizzle control must be zero.");
|
||||
ERROR_IF(brw_inst_urb_used(devinfo, inst) != 0,
|
||||
"FF_SYNC used must be zero.");
|
||||
ERROR_IF(brw_inst_urb_complete(devinfo, inst) != 0,
|
||||
"FF_SYNC complete must be zero.");
|
||||
|
||||
/* Volume 4 part 2 of the Sandybridge PRM (page 28) says:
|
||||
*
|
||||
* A message response (writeback) length of 1 GRF will be
|
||||
* indicated on the ‘send’ instruction if the thread requires
|
||||
* response data and/or synchronization.
|
||||
*/
|
||||
ERROR_IF((unsigned)brw_inst_rlen(devinfo, inst) > 1,
|
||||
"FF_SYNC read length must be 0 or 1.");
|
||||
} else {
|
||||
ERROR_IF(devinfo->ver < 7,
|
||||
"URB OWORD write messages only valid on gfx >= 7");
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_URB_OPCODE_READ_HWORD:
|
||||
case BRW_URB_OPCODE_READ_OWORD:
|
||||
ERROR_IF(devinfo->ver < 7,
|
||||
"URB read messages only valid on gfx >= 7");
|
||||
break;
|
||||
|
||||
case GFX7_URB_OPCODE_ATOMIC_MOV:
|
||||
case BRW_URB_OPCODE_WRITE_HWORD:
|
||||
case BRW_URB_OPCODE_WRITE_OWORD:
|
||||
case GFX7_URB_OPCODE_ATOMIC_INC:
|
||||
ERROR_IF(devinfo->ver < 7,
|
||||
"URB atomic move and increment messages only valid on gfx >= 7");
|
||||
break;
|
||||
|
||||
case GFX7_URB_OPCODE_ATOMIC_MOV:
|
||||
case GFX8_URB_OPCODE_ATOMIC_ADD:
|
||||
/* The Haswell PRM lists this opcode as valid on page 317. */
|
||||
ERROR_IF(devinfo->verx10 < 75,
|
||||
"URB atomic add message only valid on gfx >= 7.5");
|
||||
case GFX8_URB_OPCODE_SIMD8_WRITE:
|
||||
break;
|
||||
|
||||
case GFX8_URB_OPCODE_SIMD8_READ:
|
||||
ERROR_IF(brw_inst_rlen(devinfo, inst) == 0,
|
||||
"URB SIMD8 read message must read some data.");
|
||||
FALLTHROUGH;
|
||||
|
||||
case GFX8_URB_OPCODE_SIMD8_WRITE:
|
||||
ERROR_IF(devinfo->ver < 8,
|
||||
"URB SIMD8 messages only valid on gfx >= 8");
|
||||
break;
|
||||
|
||||
case GFX125_URB_OPCODE_FENCE:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue