2015-07-02 15:41:02 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-07-13 00:19:44 -07:00
|
|
|
/** @file
|
2015-07-02 15:41:02 -07:00
|
|
|
*
|
|
|
|
|
* Implements a pass that validates various invariants of the IR. The current
|
|
|
|
|
* pass only validates that GRF's uses are sane. More can be added later.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "brw_fs.h"
|
|
|
|
|
#include "brw_cfg.h"
|
intel/brw: Introduce new MEMORY_*_LOGICAL opcodes
This is a new unified set of opcodes for memory access loosely patterned
after the new LSC-style data port messages introduced on Alchemist GPUs.
Rather than creating an opcode for every type of memory access, it has
only three opcodes: load, store, and atomic. It has various sources to
indicate the rest:
- Binding type (raw pointer, pointer to surface state, or BT index)
- Address size (A64, A32, A16)
- Data size (bit size, number of components)
- Opcode (atomic opcode, or LOAD/STORE vs. LOAD_CMASK/STORE_CMASK)
- Mode (typed vs. untyped vs. shared-local vs. scratch)
- Address (and its dimensionality)
- Data (0 for loads, 1 for stores, 2 for atomics)
- Whether we want block access
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30828>
2022-12-25 02:00:46 -08:00
|
|
|
#include "brw_eu.h"
|
2015-07-02 15:41:02 -07:00
|
|
|
|
2022-12-01 11:45:44 -08:00
|
|
|
#define fsv_assert(assertion) \
|
|
|
|
|
{ \
|
|
|
|
|
if (!(assertion)) { \
|
2023-09-24 21:38:47 -07:00
|
|
|
fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
|
2024-04-01 12:00:16 -07:00
|
|
|
_mesa_shader_stage_to_abbrev(s.stage)); \
|
2024-07-12 16:32:36 -07:00
|
|
|
brw_print_instruction(s, inst, stderr); \
|
2022-12-01 11:45:44 -08:00
|
|
|
fprintf(stderr, "%s:%d: '%s' failed\n", __FILE__, __LINE__, #assertion); \
|
|
|
|
|
abort(); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-01 11:49:02 -07:00
|
|
|
#define fsv_assert_eq(A, B) \
|
2022-08-16 14:48:38 -07:00
|
|
|
{ \
|
2024-04-01 11:49:02 -07:00
|
|
|
unsigned a = (A); \
|
|
|
|
|
unsigned b = (B); \
|
|
|
|
|
if (a != b) { \
|
2023-09-24 21:38:47 -07:00
|
|
|
fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
|
2024-04-01 12:00:16 -07:00
|
|
|
_mesa_shader_stage_to_abbrev(s.stage)); \
|
2024-07-12 16:32:36 -07:00
|
|
|
brw_print_instruction(s, inst, stderr); \
|
2022-08-16 14:48:38 -07:00
|
|
|
fprintf(stderr, "%s:%d: A == B failed\n", __FILE__, __LINE__); \
|
2024-04-01 11:49:02 -07:00
|
|
|
fprintf(stderr, " A = %s = %u\n", #A, a); \
|
|
|
|
|
fprintf(stderr, " B = %s = %u\n", #B, b); \
|
2022-08-16 14:48:38 -07:00
|
|
|
abort(); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-01 11:49:02 -07:00
|
|
|
#define fsv_assert_ne(A, B) \
|
2022-12-06 12:16:12 -08:00
|
|
|
{ \
|
2024-04-01 11:49:02 -07:00
|
|
|
unsigned a = (A); \
|
|
|
|
|
unsigned b = (B); \
|
|
|
|
|
if (a == b) { \
|
2023-09-24 21:38:47 -07:00
|
|
|
fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
|
2024-04-01 12:00:16 -07:00
|
|
|
_mesa_shader_stage_to_abbrev(s.stage)); \
|
2024-07-12 16:32:36 -07:00
|
|
|
brw_print_instruction(s, inst, stderr); \
|
2022-12-06 12:16:12 -08:00
|
|
|
fprintf(stderr, "%s:%d: A != B failed\n", __FILE__, __LINE__); \
|
2024-04-01 11:49:02 -07:00
|
|
|
fprintf(stderr, " A = %s = %u\n", #A, a); \
|
|
|
|
|
fprintf(stderr, " B = %s = %u\n", #B, b); \
|
2022-12-06 12:16:12 -08:00
|
|
|
abort(); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-01 11:49:02 -07:00
|
|
|
#define fsv_assert_lte(A, B) \
|
2022-08-16 14:48:38 -07:00
|
|
|
{ \
|
2024-04-01 11:49:02 -07:00
|
|
|
unsigned a = (A); \
|
|
|
|
|
unsigned b = (B); \
|
|
|
|
|
if (a > b) { \
|
2023-09-24 21:38:47 -07:00
|
|
|
fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
|
2024-04-01 12:00:16 -07:00
|
|
|
_mesa_shader_stage_to_abbrev(s.stage)); \
|
2024-07-12 16:32:36 -07:00
|
|
|
brw_print_instruction(s, inst, stderr); \
|
2022-08-16 14:48:38 -07:00
|
|
|
fprintf(stderr, "%s:%d: A <= B failed\n", __FILE__, __LINE__); \
|
2024-04-01 11:49:02 -07:00
|
|
|
fprintf(stderr, " A = %s = %u\n", #A, a); \
|
|
|
|
|
fprintf(stderr, " B = %s = %u\n", #B, b); \
|
2022-08-16 14:48:38 -07:00
|
|
|
abort(); \
|
|
|
|
|
} \
|
2015-07-02 15:41:02 -07:00
|
|
|
}
|
|
|
|
|
|
2023-10-22 14:36:03 -07:00
|
|
|
#ifndef NDEBUG
|
intel/brw: Introduce new MEMORY_*_LOGICAL opcodes
This is a new unified set of opcodes for memory access loosely patterned
after the new LSC-style data port messages introduced on Alchemist GPUs.
Rather than creating an opcode for every type of memory access, it has
only three opcodes: load, store, and atomic. It has various sources to
indicate the rest:
- Binding type (raw pointer, pointer to surface state, or BT index)
- Address size (A64, A32, A16)
- Data size (bit size, number of components)
- Opcode (atomic opcode, or LOAD/STORE vs. LOAD_CMASK/STORE_CMASK)
- Mode (typed vs. untyped vs. shared-local vs. scratch)
- Address (and its dimensionality)
- Data (0 for loads, 1 for stores, 2 for atomics)
- Whether we want block access
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30828>
2022-12-25 02:00:46 -08:00
|
|
|
static inline bool
|
|
|
|
|
is_ud_imm(const brw_reg ®)
|
|
|
|
|
{
|
|
|
|
|
return reg.file == IMM && reg.type == BRW_TYPE_UD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
validate_memory_logical(const fs_visitor &s, const fs_inst *inst)
|
|
|
|
|
{
|
|
|
|
|
const intel_device_info *devinfo = s.devinfo;
|
|
|
|
|
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
|
|
|
|
|
fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
|
|
|
|
|
|
2025-01-09 14:14:16 -08:00
|
|
|
enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
|
|
|
|
|
enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
|
|
|
|
|
bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
|
|
|
|
|
bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
|
|
|
|
enum memory_logical_mode mode =
|
|
|
|
|
(memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
|
|
|
|
|
|
intel/brw: Introduce new MEMORY_*_LOGICAL opcodes
This is a new unified set of opcodes for memory access loosely patterned
after the new LSC-style data port messages introduced on Alchemist GPUs.
Rather than creating an opcode for every type of memory access, it has
only three opcodes: load, store, and atomic. It has various sources to
indicate the rest:
- Binding type (raw pointer, pointer to surface state, or BT index)
- Address size (A64, A32, A16)
- Data size (bit size, number of components)
- Opcode (atomic opcode, or LOAD/STORE vs. LOAD_CMASK/STORE_CMASK)
- Mode (typed vs. untyped vs. shared-local vs. scratch)
- Address (and its dimensionality)
- Data (0 for loads, 1 for stores, 2 for atomics)
- Whether we want block access
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30828>
2022-12-25 02:00:46 -08:00
|
|
|
enum lsc_data_size data_size =
|
|
|
|
|
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
|
|
|
|
|
unsigned data_size_B = lsc_data_size_bytes(data_size);
|
|
|
|
|
|
|
|
|
|
if (!devinfo->has_lsc) {
|
|
|
|
|
fsv_assert(data_size == LSC_DATA_SIZE_D8U32 ||
|
|
|
|
|
data_size == LSC_DATA_SIZE_D16U32 ||
|
|
|
|
|
data_size == LSC_DATA_SIZE_D32 ||
|
|
|
|
|
data_size == LSC_DATA_SIZE_D64);
|
|
|
|
|
|
2025-01-09 14:14:16 -08:00
|
|
|
if (transpose) {
|
|
|
|
|
const unsigned min_alignment =
|
|
|
|
|
mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud >= min_alignment);
|
|
|
|
|
}
|
|
|
|
|
}
|
intel/brw: Introduce new MEMORY_*_LOGICAL opcodes
This is a new unified set of opcodes for memory access loosely patterned
after the new LSC-style data port messages introduced on Alchemist GPUs.
Rather than creating an opcode for every type of memory access, it has
only three opcodes: load, store, and atomic. It has various sources to
indicate the rest:
- Binding type (raw pointer, pointer to surface state, or BT index)
- Address size (A64, A32, A16)
- Data size (bit size, number of components)
- Opcode (atomic opcode, or LOAD/STORE vs. LOAD_CMASK/STORE_CMASK)
- Mode (typed vs. untyped vs. shared-local vs. scratch)
- Address (and its dimensionality)
- Data (0 for loads, 1 for stores, 2 for atomics)
- Whether we want block access
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30828>
2022-12-25 02:00:46 -08:00
|
|
|
|
|
|
|
|
fsv_assert(!transpose || !include_helpers);
|
|
|
|
|
fsv_assert(!transpose || lsc_opcode_has_transpose(op));
|
|
|
|
|
|
|
|
|
|
if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_BINDING].file == BAD_FILE);
|
|
|
|
|
|
|
|
|
|
if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
|
|
|
|
|
inst->components_read(MEMORY_LOGICAL_DATA1));
|
|
|
|
|
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].type ==
|
|
|
|
|
inst->src[MEMORY_LOGICAL_DATA1].type);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
|
|
|
|
|
inst->components_read(MEMORY_LOGICAL_DATA0));
|
|
|
|
|
|
|
|
|
|
fsv_assert(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type) ==
|
|
|
|
|
data_size_B);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->dst.file != BAD_FILE)
|
|
|
|
|
fsv_assert(brw_type_size_bytes(inst->dst.type) == data_size_B);
|
|
|
|
|
|
|
|
|
|
switch (inst->opcode) {
|
|
|
|
|
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
|
|
|
|
fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK);
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE);
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
|
|
|
|
|
break;
|
|
|
|
|
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
|
|
|
|
fsv_assert(lsc_opcode_is_store(op));
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE);
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
|
|
|
|
|
break;
|
|
|
|
|
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
|
|
|
|
fsv_assert(lsc_opcode_is_atomic(op));
|
|
|
|
|
fsv_assert((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE)
|
|
|
|
|
== (lsc_op_num_data_values(op) < 1));
|
|
|
|
|
fsv_assert((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE)
|
|
|
|
|
== (lsc_op_num_data_values(op) < 2));
|
|
|
|
|
fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == 1);
|
|
|
|
|
fsv_assert(!include_helpers);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable("invalid opcode");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-27 13:00:46 -07:00
|
|
|
static const char *
|
|
|
|
|
brw_shader_phase_to_string(enum brw_shader_phase phase)
|
|
|
|
|
{
|
|
|
|
|
switch (phase) {
|
|
|
|
|
case BRW_SHADER_PHASE_INITIAL: return "INITIAL";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_NIR: return "AFTER_NIR";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_OPT_LOOP: return "AFTER_OPT_LOOP";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_EARLY_LOWERING: return "AFTER_EARLY_LOWERING";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING: return "AFTER_MIDDLE_LOWERING";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_LATE_LOWERING: return "AFTER_LATE_LOWERING";
|
|
|
|
|
case BRW_SHADER_PHASE_AFTER_REGALLOC: return "AFTER_REGALLOC";
|
|
|
|
|
case BRW_SHADER_PHASE_INVALID: break;
|
|
|
|
|
}
|
|
|
|
|
unreachable("invalid_phase");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
|
|
|
|
|
{
|
|
|
|
|
enum brw_shader_phase invalid_from = BRW_SHADER_PHASE_INVALID;
|
|
|
|
|
|
|
|
|
|
switch (inst->opcode) {
|
|
|
|
|
case FS_OPCODE_FB_WRITE_LOGICAL:
|
|
|
|
|
case FS_OPCODE_FB_READ_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TEX_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXF_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXL_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXS_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
|
|
|
|
|
case FS_OPCODE_TXB_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TXF_MCS_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_LOD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
|
|
|
|
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
|
|
|
|
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
|
|
|
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
|
|
|
|
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
|
|
|
|
case RT_OPCODE_TRACE_RAY_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_URB_READ_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
2024-07-15 15:09:12 -07:00
|
|
|
case SHADER_OPCODE_REDUCE:
|
2024-07-16 14:06:12 -07:00
|
|
|
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
|
|
|
|
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
2024-09-04 10:07:52 -07:00
|
|
|
case SHADER_OPCODE_VOTE_ANY:
|
|
|
|
|
case SHADER_OPCODE_VOTE_ALL:
|
|
|
|
|
case SHADER_OPCODE_VOTE_EQUAL:
|
2024-09-05 09:23:11 -07:00
|
|
|
case SHADER_OPCODE_BALLOT:
|
2024-09-05 17:37:25 -07:00
|
|
|
case SHADER_OPCODE_QUAD_SWAP:
|
2024-11-29 15:31:05 -08:00
|
|
|
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
|
|
|
|
|
case SHADER_OPCODE_READ_FROM_CHANNEL:
|
2024-08-27 13:00:46 -07:00
|
|
|
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
|
|
|
|
invalid_from = BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
/* Nothing to do. */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(s.phase < BRW_SHADER_PHASE_INVALID);
|
|
|
|
|
if (s.phase >= invalid_from) {
|
|
|
|
|
fprintf(stderr, "INVALID INSTRUCTION IN PHASE: %s\n",
|
|
|
|
|
brw_shader_phase_to_string(s.phase));
|
|
|
|
|
brw_print_instruction(s, inst, stderr);
|
|
|
|
|
abort();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-02 15:41:02 -07:00
|
|
|
void
|
2024-04-01 12:00:16 -07:00
|
|
|
brw_fs_validate(const fs_visitor &s)
|
2015-07-02 15:41:02 -07:00
|
|
|
{
|
2024-04-01 12:00:16 -07:00
|
|
|
const intel_device_info *devinfo = s.devinfo;
|
2023-09-11 09:10:47 -07:00
|
|
|
|
2024-08-27 10:16:11 -07:00
|
|
|
if (s.phase <= BRW_SHADER_PHASE_AFTER_NIR)
|
|
|
|
|
return;
|
|
|
|
|
|
2024-04-01 12:00:16 -07:00
|
|
|
s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
|
|
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
foreach_block(block, s.cfg) {
|
|
|
|
|
foreach_inst_in_block (fs_inst, inst, block) {
|
|
|
|
|
brw_validate_instruction_phase(s, inst);
|
|
|
|
|
|
|
|
|
|
switch (inst->opcode) {
|
|
|
|
|
case SHADER_OPCODE_SEND:
|
|
|
|
|
fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BRW_OPCODE_MOV:
|
|
|
|
|
fsv_assert(inst->sources == 1);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
|
|
|
|
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
|
|
|
|
validate_memory_logical(s, inst);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
2024-04-11 15:21:26 -07:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
/* On Xe2, the "write the accumulator in addition to the explicit
|
|
|
|
|
* destination" bit no longer exists. Try to catch uses of this
|
|
|
|
|
* feature earlier in the process.
|
|
|
|
|
*/
|
|
|
|
|
if (devinfo->ver >= 20 && inst->writes_accumulator) {
|
|
|
|
|
fsv_assert(inst->dst.is_accumulator() ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_ADDC ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_MACH ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_SUBB);
|
|
|
|
|
}
|
2022-12-06 12:16:12 -08:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
if (inst->is_3src(s.compiler)) {
|
|
|
|
|
const unsigned integer_sources =
|
|
|
|
|
brw_type_is_int(inst->src[0].type) +
|
|
|
|
|
brw_type_is_int(inst->src[1].type) +
|
|
|
|
|
brw_type_is_int(inst->src[2].type);
|
|
|
|
|
const unsigned float_sources =
|
|
|
|
|
brw_type_is_float(inst->src[0].type) +
|
|
|
|
|
brw_type_is_float(inst->src[1].type) +
|
|
|
|
|
brw_type_is_float(inst->src[2].type);
|
|
|
|
|
|
|
|
|
|
fsv_assert((integer_sources == 3 && float_sources == 0) ||
|
|
|
|
|
(integer_sources == 0 && float_sources == 3));
|
|
|
|
|
|
|
|
|
|
if (devinfo->ver >= 10) {
|
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
if (inst->src[i].file == IMM)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
switch (inst->src[i].vstride) {
|
|
|
|
|
case BRW_VERTICAL_STRIDE_0:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_4:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_8:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_16:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_1:
|
|
|
|
|
fsv_assert_lte(12, devinfo->ver);
|
|
|
|
|
break;
|
2022-12-06 12:16:12 -08:00
|
|
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_2:
|
|
|
|
|
fsv_assert_lte(devinfo->ver, 11);
|
|
|
|
|
break;
|
|
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
default:
|
|
|
|
|
fsv_assert(!"invalid vstride");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-12-06 12:16:12 -08:00
|
|
|
}
|
2024-11-06 14:54:09 +02:00
|
|
|
} else if (s.grf_used != 0) {
|
|
|
|
|
/* Only perform the pre-Gfx10 checks after register allocation
|
|
|
|
|
* has occured.
|
2022-12-06 12:16:12 -08:00
|
|
|
*
|
2024-11-06 14:54:09 +02:00
|
|
|
* Many passes (e.g., constant copy propagation) will
|
|
|
|
|
* genenerate invalid 3-source instructions with the
|
|
|
|
|
* expectation that later passes (e.g., combine constants) will
|
|
|
|
|
* fix them.
|
2022-12-06 12:16:12 -08:00
|
|
|
*/
|
2024-11-06 14:54:09 +02:00
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
fsv_assert_ne(inst->src[i].file, IMM);
|
|
|
|
|
|
|
|
|
|
/* A stride of 1 (the usual case) or 0, with a special
|
|
|
|
|
* "repctrl" bit, is allowed. The repctrl bit doesn't work
|
|
|
|
|
* for 64-bit datatypes, so if the source type is 64-bit
|
|
|
|
|
* then only a stride of 1 is allowed. From the Broadwell
|
|
|
|
|
* PRM, Volume 7 "3D Media GPGPU", page 944:
|
|
|
|
|
*
|
|
|
|
|
* This is applicable to 32b datatypes and 16b datatype.
|
|
|
|
|
* 64b datatypes cannot use the replicate control.
|
2024-02-20 22:23:07 -08:00
|
|
|
*/
|
2024-11-06 14:54:09 +02:00
|
|
|
const unsigned stride_in_bytes = byte_stride(inst->src[i]);
|
|
|
|
|
const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
|
|
|
|
|
if (stride_in_bytes == 0) {
|
|
|
|
|
/* If the source is_scalar, then the stride will be
|
|
|
|
|
* converted to <4;4,1> in brw_lower_scalar_fp64_MAD
|
|
|
|
|
* after SIMD splitting.
|
|
|
|
|
*/
|
|
|
|
|
if (!inst->src[i].is_scalar)
|
|
|
|
|
fsv_assert_lte(size_in_bytes, 4);
|
|
|
|
|
} else {
|
|
|
|
|
fsv_assert_eq(stride_in_bytes, size_in_bytes);
|
|
|
|
|
}
|
2024-08-28 13:11:08 -07:00
|
|
|
}
|
2022-12-06 12:16:12 -08:00
|
|
|
}
|
|
|
|
|
}
|
2022-12-01 11:45:44 -08:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
if (inst->dst.file == VGRF) {
|
|
|
|
|
fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
|
|
|
|
|
s.alloc.sizes[inst->dst.nr]);
|
brw: Basic infrastructure to store convergent values as scalars
In SIMD16 and SIMD32, storing convergent values in full 16- or
32-channel registers is wasteful. It wastes register space, and in most
cases on SIMD32, it wastes instructions. Our register allocator is not
clever enough to handle scalar allocations. It's fundamental unit of
allocation is SIMD8. Start treating convergent values as SIMD8.
Add a tracking bit in brw_reg to specify that a register represents a
convergent, scalar value. This has two implications:
1. All channels of the SIMD8 register must contain the same value. In
general, this means that writes to the register must be
force_writemask_all and exec_size = 8;
2. Reads of this register can (and should) use <0,1,0> stride. SIMD8
instructions that have restrictions on source stride can us <8,8,1>.
Values that are vectors (e.g., results of load_uniform or texture
operations) will be stored as multiple SIMD8 hardware registers.
v2: brw_fs_opt_copy_propagation_defs fix from Ken. Fix for Xe2.
v3: Eliminte offset_to_scalar(). Remove mention of vec4 backend in
brw_reg.h. Both suggested by Caio. The offset_to_scalar() change
necessitates some trickery in the fs_builder offset() function, but I
think this is an improvement overall. There is also some rework in
find_value_for_offset to account for the possibility that is_scalar
sources in LOAD_PAYLOAD might be <8;8,1> or <0;1,0>.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29884>
2024-02-09 17:12:11 -08:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
if (inst->exec_size > 1)
|
|
|
|
|
fsv_assert_ne(inst->dst.stride, 0);
|
2015-07-02 15:41:02 -07:00
|
|
|
}
|
2023-03-08 13:21:48 +02:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
for (unsigned i = 0; i < inst->sources; i++) {
|
|
|
|
|
if (inst->src[i].file == VGRF) {
|
|
|
|
|
fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
|
|
|
|
|
s.alloc.sizes[inst->src[i].nr]);
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-28 09:54:20 -07:00
|
|
|
|
2024-11-06 14:54:09 +02:00
|
|
|
/* Accumulator Registers, bspec 47251:
|
2024-03-28 09:54:20 -07:00
|
|
|
*
|
2024-11-06 14:54:09 +02:00
|
|
|
* "When destination is accumulator with offset 0, destination
|
|
|
|
|
* horizontal stride must be 1."
|
2024-03-28 09:54:20 -07:00
|
|
|
*/
|
2024-11-06 14:54:09 +02:00
|
|
|
if (intel_needs_workaround(devinfo, 14014617373) &&
|
|
|
|
|
inst->dst.is_accumulator() &&
|
|
|
|
|
phys_subnr(devinfo, inst->dst) == 0) {
|
|
|
|
|
fsv_assert_eq(inst->dst.hstride, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
|
|
|
|
|
/* Wa_22016140776:
|
|
|
|
|
*
|
|
|
|
|
* Scalar broadcast on HF math (packed or unpacked) must not be
|
|
|
|
|
* used. Compiler must use a mov instruction to expand the
|
|
|
|
|
* scalar value to a vector before using in a HF (packed or
|
|
|
|
|
* unpacked) math operation.
|
|
|
|
|
*
|
|
|
|
|
* Since copy propagation knows about this restriction, nothing
|
|
|
|
|
* should be able to generate these invalid source strides. Detect
|
|
|
|
|
* potential problems sooner rather than later.
|
|
|
|
|
*/
|
|
|
|
|
if (devinfo->ver >= 20 && inst->writes_accumulator) {
|
|
|
|
|
fsv_assert(inst->dst.is_accumulator() ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_ADDC ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_MACH ||
|
|
|
|
|
inst->opcode == BRW_OPCODE_SUBB);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->is_3src(s.compiler)) {
|
|
|
|
|
const unsigned integer_sources =
|
|
|
|
|
brw_type_is_int(inst->src[0].type) +
|
|
|
|
|
brw_type_is_int(inst->src[1].type) +
|
|
|
|
|
brw_type_is_int(inst->src[2].type);
|
|
|
|
|
const unsigned float_sources =
|
|
|
|
|
brw_type_is_float(inst->src[0].type) +
|
|
|
|
|
brw_type_is_float(inst->src[1].type) +
|
|
|
|
|
brw_type_is_float(inst->src[2].type);
|
|
|
|
|
|
|
|
|
|
fsv_assert((integer_sources == 3 && float_sources == 0) ||
|
|
|
|
|
(integer_sources == 0 && float_sources == 3));
|
|
|
|
|
|
|
|
|
|
if (devinfo->ver >= 10) {
|
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
if (inst->src[i].file == IMM)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
switch (inst->src[i].vstride) {
|
|
|
|
|
case BRW_VERTICAL_STRIDE_0:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_4:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_8:
|
|
|
|
|
case BRW_VERTICAL_STRIDE_16:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_1:
|
|
|
|
|
fsv_assert_lte(12, devinfo->ver);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case BRW_VERTICAL_STRIDE_2:
|
|
|
|
|
fsv_assert_lte(devinfo->ver, 11);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
fsv_assert(!"invalid vstride");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (s.grf_used != 0) {
|
|
|
|
|
/* Only perform the pre-Gfx10 checks after register
|
|
|
|
|
* allocation has occured.
|
|
|
|
|
*
|
|
|
|
|
* Many passes (e.g., constant copy propagation) will
|
|
|
|
|
* genenerate invalid 3-source instructions with the
|
|
|
|
|
* expectation that later passes (e.g., combine constants)
|
|
|
|
|
* will fix them.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
fsv_assert_ne(inst->src[i].file, IMM);
|
|
|
|
|
|
|
|
|
|
/* A stride of 1 (the usual case) or 0, with a special
|
|
|
|
|
* "repctrl" bit, is allowed. The repctrl bit doesn't
|
|
|
|
|
* work for 64-bit datatypes, so if the source type is
|
|
|
|
|
* 64-bit then only a stride of 1 is allowed. From the
|
|
|
|
|
* Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
|
|
|
|
|
*
|
|
|
|
|
* This is applicable to 32b datatypes and 16b
|
|
|
|
|
* datatype. 64b datatypes cannot use the replicate
|
|
|
|
|
* control.
|
|
|
|
|
*/
|
|
|
|
|
const unsigned stride_in_bytes = byte_stride(inst->src[i]);
|
|
|
|
|
const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
|
|
|
|
|
if (stride_in_bytes == 0) {
|
|
|
|
|
fsv_assert_lte(size_in_bytes, 4);
|
|
|
|
|
} else {
|
|
|
|
|
fsv_assert_eq(stride_in_bytes, size_in_bytes);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inst->dst.file == VGRF) {
|
|
|
|
|
fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
|
|
|
|
|
s.alloc.sizes[inst->dst.nr]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < inst->sources; i++) {
|
|
|
|
|
fsv_assert(inst->src[i].is_scalar ||
|
|
|
|
|
!is_uniform(inst->src[i]) ||
|
|
|
|
|
inst->src[i].type != BRW_TYPE_HF);
|
|
|
|
|
}
|
2024-03-28 09:54:20 -07:00
|
|
|
}
|
|
|
|
|
}
|
2015-07-02 15:41:02 -07:00
|
|
|
}
|
|
|
|
|
}
|
2023-10-22 14:36:03 -07:00
|
|
|
#endif
|