intel/elk: Remove remaining Gfx9+ code

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
Caio Oliveira 2024-02-13 13:20:08 -08:00 committed by Marge Bot
parent ea12b38602
commit fd3a815a5b
9 changed files with 77 additions and 365 deletions

View file

@ -86,7 +86,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
* destination type can be Quadword and source type Doubleword for Gfx8 and
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
*/
if (devinfo->ver < 8 || devinfo->ver > 9)
if (devinfo->ver < 8)
int64_options |= nir_lower_imul_2x32_64;
/* We want the GLSL compiler to emit code that uses condition codes */
@ -107,8 +107,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_options->lower_ffma16 = devinfo->ver < 6;
nir_options->lower_ffma32 = devinfo->ver < 6;
nir_options->lower_ffma64 = devinfo->ver < 6;
nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11;
nir_options->lower_fpow = devinfo->ver >= 12;
nir_options->lower_flrp32 = devinfo->ver < 6;
nir_options->has_bfe = devinfo->ver >= 7;
nir_options->has_bfm = devinfo->ver >= 7;
@ -127,9 +126,8 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
elk_nir_no_indirect_mask(compiler, i);
nir_options->force_indirect_unrolling_sampler = devinfo->ver < 7;
if (devinfo->ver < 12)
nir_options->divergence_analysis_options |=
nir_divergence_single_prim_per_subgroup;
nir_options->divergence_analysis_options |=
nir_divergence_single_prim_per_subgroup;
compiler->nir_options[i] = nir_options;
}

View file

@ -1799,7 +1799,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
* to do a full test run with elk_fs_test_dispatch_packing() hooked up to
* the NIR front-end before changing this assertion.
*/
assert(devinfo->ver <= 12);
assert(devinfo->ver <= 8);
switch (stage) {
case MESA_SHADER_FRAGMENT: {
@ -1813,8 +1813,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
*/
const struct elk_wm_prog_data *wm_prog_data =
(const struct elk_wm_prog_data *)prog_data;
return devinfo->verx10 < 125 &&
!wm_prog_data->persample_dispatch &&
return !wm_prog_data->persample_dispatch &&
wm_prog_data->uses_vmask &&
max_polygons < 2;
}

View file

@ -271,7 +271,7 @@ ALU2(SUBB)
static inline unsigned
reg_unit(const struct intel_device_info *devinfo)
{
return devinfo->ver >= 20 ? 2 : 1;
return 1;
}
@ -387,20 +387,6 @@ elk_sampler_desc(const struct intel_device_info *devinfo,
const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
SET_BITS(sampler, 11, 8));
/* From GFX20 Bspec: Shared Functions - Message Descriptor -
* Sampling Engine:
*
* Message Type[5] 31 This bit represents the upper bit of message type
* 6-bit encoding (c.f. [16:12]). This bit is set
* for messages with programmable offsets.
*/
if (devinfo->ver >= 20)
return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
SET_BITS(simd_mode & 0x3, 18, 17) |
SET_BITS(simd_mode >> 2, 29, 29) |
SET_BITS(return_format, 30, 30) |
SET_BITS(msg_type >> 5, 31, 31);
/* From the CHV Bspec: Shared Functions - Message Descriptor -
* Sampling Engine:
*
@ -443,9 +429,7 @@ elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
{
if (devinfo->ver >= 20)
return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
else if (devinfo->ver >= 7)
if (devinfo->ver >= 7)
return GET_BITS(desc, 16, 12);
else if (devinfo->verx10 >= 45)
return GET_BITS(desc, 15, 12);
@ -1066,7 +1050,7 @@ elk_fb_write_desc(const struct intel_device_info *devinfo,
GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
assert(devinfo->ver >= 10 || !coarse_write);
assert(!coarse_write);
if (devinfo->ver >= 6) {
return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
@ -1121,14 +1105,6 @@ elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
return GET_BITS(desc, 15, 15);
}
static inline bool
elk_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
uint32_t desc)
{
assert(devinfo->ver >= 10);
return GET_BITS(desc, 18, 18);
}
static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
{
@ -1570,18 +1546,6 @@ elk_mdc_sm2_exec_size(uint32_t sm2)
return 8 << sm2;
}
static inline uint32_t
elk_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
unsigned exec_size, unsigned msg_type)
{
assert(devinfo->has_ray_tracing);
assert(devinfo->ver < 20 || exec_size == 16);
return SET_BITS(0, 19, 19) | /* No header */
SET_BITS(msg_type, 17, 14) |
SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
}
static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
uint32_t desc)
@ -1612,7 +1576,7 @@ elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
const bool simd_mode = exec_size == 16;
const bool slot_group = group >= 16;
assert(devinfo->ver >= 10 || !coarse_pixel_rate);
assert(!coarse_pixel_rate);
return (SET_BITS(slot_group, 11, 11) |
SET_BITS(msg_type, 13, 12) |
SET_BITS(!!noperspective, 14, 14) |

View file

@ -435,28 +435,7 @@ elk_fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
if (is_send_from_grf())
return false;
/* From Wa_1604601757:
*
* "When multiplying a DW and any lower precision integer, source modifier
* is not supported."
*/
if (devinfo->ver >= 12 && (opcode == ELK_OPCODE_MUL ||
opcode == ELK_OPCODE_MAD)) {
const elk_reg_type exec_type = get_exec_type(this);
const unsigned min_type_sz = opcode == ELK_OPCODE_MAD ?
MIN2(type_sz(src[1].type), type_sz(src[2].type)) :
MIN2(type_sz(src[0].type), type_sz(src[1].type));
if (elk_reg_type_is_integer(exec_type) &&
type_sz(exec_type) >= 4 &&
type_sz(exec_type) != min_type_sz)
return false;
}
if (!elk_backend_instruction::can_do_source_mods())
return false;
return true;
return elk_backend_instruction::can_do_source_mods();
}
bool
@ -939,24 +918,20 @@ namespace {
unsigned
predicate_width(const intel_device_info *devinfo, elk_predicate predicate)
{
if (devinfo->ver >= 20) {
return 1;
} else {
switch (predicate) {
case ELK_PREDICATE_NONE: return 1;
case ELK_PREDICATE_NORMAL: return 1;
case ELK_PREDICATE_ALIGN1_ANY2H: return 2;
case ELK_PREDICATE_ALIGN1_ALL2H: return 2;
case ELK_PREDICATE_ALIGN1_ANY4H: return 4;
case ELK_PREDICATE_ALIGN1_ALL4H: return 4;
case ELK_PREDICATE_ALIGN1_ANY8H: return 8;
case ELK_PREDICATE_ALIGN1_ALL8H: return 8;
case ELK_PREDICATE_ALIGN1_ANY16H: return 16;
case ELK_PREDICATE_ALIGN1_ALL16H: return 16;
case ELK_PREDICATE_ALIGN1_ANY32H: return 32;
case ELK_PREDICATE_ALIGN1_ALL32H: return 32;
default: unreachable("Unsupported predicate");
}
switch (predicate) {
case ELK_PREDICATE_NONE: return 1;
case ELK_PREDICATE_NORMAL: return 1;
case ELK_PREDICATE_ALIGN1_ANY2H: return 2;
case ELK_PREDICATE_ALIGN1_ALL2H: return 2;
case ELK_PREDICATE_ALIGN1_ANY4H: return 4;
case ELK_PREDICATE_ALIGN1_ALL4H: return 4;
case ELK_PREDICATE_ALIGN1_ANY8H: return 8;
case ELK_PREDICATE_ALIGN1_ALL8H: return 8;
case ELK_PREDICATE_ALIGN1_ANY16H: return 16;
case ELK_PREDICATE_ALIGN1_ALL16H: return 16;
case ELK_PREDICATE_ALIGN1_ANY32H: return 32;
case ELK_PREDICATE_ALIGN1_ALL32H: return 32;
default: unreachable("Unsupported predicate");
}
}
@ -996,8 +971,8 @@ namespace {
unsigned
elk_fs_inst::flags_read(const intel_device_info *devinfo) const
{
if (devinfo->ver < 20 && (predicate == ELK_PREDICATE_ALIGN1_ANYV ||
predicate == ELK_PREDICATE_ALIGN1_ALLV)) {
if (predicate == ELK_PREDICATE_ALIGN1_ANYV ||
predicate == ELK_PREDICATE_ALIGN1_ALLV) {
/* The vertical predication modes combine corresponding bits from
* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
*/
@ -1275,17 +1250,6 @@ elk_fs_visitor::assign_curb_setup()
prog_data->curb_read_length = uniform_push_length + ubo_push_length;
uint64_t used = 0;
bool is_compute = gl_shader_stage_is_compute(stage);
if (is_compute && elk_cs_prog_data(prog_data)->uses_inline_data) {
/* With COMPUTE_WALKER, we can push up to one register worth of data via
* the inline data parameter in the COMPUTE_WALKER command itself.
*
* TODO: Support inline data and push at the same time.
*/
assert(devinfo->verx10 >= 125);
assert(uniform_push_length <= reg_unit(devinfo));
}
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_block_and_inst(block, elk_fs_inst, inst, cfg) {
@ -1602,78 +1566,22 @@ elk_fs_visitor::assign_urb_setup()
* representation described above into an offset and a
* grf, which contains the plane parameters for the first
* polygon processed by the thread.
*
* Earlier platforms and per-primitive block pack 2 logical
* input components per 32B register.
*/
if (devinfo->ver >= 20 && !per_prim) {
/* Gfx20+ is able to pack 5 logical input components
* per 64B register for vertex setup data.
*/
const unsigned grf = base + idx / 5 * 2 * max_polygons;
assert(inst->src[i].offset / param_width < 12);
const unsigned delta = idx % 5 * 12 +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
delta);
} else {
/* Earlier platforms and per-primitive block pack 2 logical
* input components per 32B register.
*/
const unsigned grf = base + idx / 2 * max_polygons;
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
delta);
}
const unsigned grf = base + idx / 2 * max_polygons;
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
delta);
if (max_polygons > 1) {
assert(devinfo->ver >= 12);
/* Misaligned channel strides that would lead to
* cross-channel access in the representation above are
* disallowed.
*/
assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz);
/* Number of channels processing the same polygon. */
const unsigned poly_width = dispatch_width / max_polygons;
assert(dispatch_width % max_polygons == 0);
/* Accessing a subset of channels of a parameter vector
* starting from "chan" is necessary to handle
* SIMD-lowered instructions though.
*/
const unsigned chan = inst->src[i].offset %
(param_width * chan_sz) / chan_sz;
assert(chan < dispatch_width);
assert(chan % poly_width == 0);
const unsigned reg_size = reg_unit(devinfo) * REG_SIZE;
reg = byte_offset(reg, chan / poly_width * reg_size);
if (inst->exec_size > poly_width) {
/* Accessing the parameters for multiple polygons.
* Corresponding parameters for different polygons
* are stored a GRF apart on the thread payload, so
* use that as vertical stride.
*/
const unsigned vstride = reg_size / type_sz(inst->src[i].type);
assert(vstride <= 32);
assert(chan % poly_width == 0);
reg = stride(reg, vstride, poly_width, 0);
} else {
/* Accessing one parameter for a single polygon --
* Translate to a scalar region.
*/
assert(chan % poly_width + inst->exec_size <= poly_width);
reg = stride(reg, 0, 1, 0);
}
} else {
const unsigned width = inst->src[i].stride == 0 ?
1 : MIN2(inst->exec_size, 8);
reg = stride(reg, width * inst->src[i].stride,
width, inst->src[i].stride);
}
const unsigned width = inst->src[i].stride == 0 ?
1 : MIN2(inst->exec_size, 8);
reg = stride(reg, width * inst->src[i].stride,
width, inst->src[i].stride);
reg.abs = inst->src[i].abs;
reg.negate = inst->src[i].negate;
@ -2078,9 +1986,6 @@ elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
if (prog_data->nr_params == 0)
return -1;
if (devinfo->verx10 >= 125)
return -1;
/* The local thread id is always the last parameter in the list */
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
if (last_param == ELK_PARAM_BUILTIN_SUBGROUP_ID)
@ -3787,20 +3692,7 @@ elk_fs_visitor::lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block)
bool do_addition = true;
if (devinfo->ver >= 7) {
/* From Wa_1604601757:
*
* "When multiplying a DW and any lower precision integer, source modifier
* is not supported."
*
* An unsupported negate modifier on src[1] would ordinarily be
* lowered by the subsequent lower_regioning pass. In this case that
* pass would spawn another dword multiply. Instead, lower the
* modifier first.
*/
const bool source_mods_unsupported = (devinfo->ver >= 12);
if (inst->src[1].abs || (inst->src[1].negate &&
source_mods_unsupported))
if (inst->src[1].abs)
lower_src_modifiers(this, block, inst, 1);
if (inst->src[1].file == IMM) {
@ -4027,8 +3919,7 @@ elk_fs_visitor::lower_integer_multiplication()
} else if (!inst->dst.is_accumulator() &&
(inst->dst.type == ELK_REGISTER_TYPE_D ||
inst->dst.type == ELK_REGISTER_TYPE_UD) &&
(!devinfo->has_integer_dword_mul ||
devinfo->verx10 >= 125)) {
!devinfo->has_integer_dword_mul) {
lower_mul_dword_inst(inst, block);
inst->remove(block);
progress = true;
@ -4192,7 +4083,6 @@ elk_sample_mask_reg(const fs_builder &bld)
return elk_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16);
} else {
assert(s.devinfo->ver >= 6 && bld.dispatch_width() <= 16);
assert(s.devinfo->ver < 20);
return retype(elk_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),
ELK_REGISTER_TYPE_UW);
}
@ -4258,7 +4148,6 @@ elk_emit_predicate_on_sample_mask(const fs_builder &bld, elk_fs_inst *inst)
assert(inst->predicate == ELK_PREDICATE_NORMAL);
assert(!inst->predicate_inverse);
assert(inst->flag_subreg == 0);
assert(s.devinfo->ver < 20);
/* Combine the sample mask with the existing predicate by using a
* vertical predication mode.
*/
@ -4458,7 +4347,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
* "Ternary instruction with condition modifiers must not use SIMD32."
*/
if (inst->conditional_mod && (devinfo->ver < 8 ||
(inst->elk_is_3src(compiler) && devinfo->ver < 12)))
inst->elk_is_3src(compiler)))
max_width = MIN2(max_width, 16);
/* From the IVB PRMs (applies to other devices that don't have the
@ -4521,7 +4410,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
* instructions do not support HF types and conversions from/to F are
* required.
*/
if (is_mixed_float_with_fp32_dst(inst) && devinfo->ver < 20)
if (is_mixed_float_with_fp32_dst(inst))
max_width = MIN2(max_width, 8);
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
@ -4530,7 +4419,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
* "No SIMD16 in mixed mode when destination is packed f16 for both
* Align1 and Align16."
*/
if (is_mixed_float_with_packed_fp16_dst(inst) && devinfo->ver < 20)
if (is_mixed_float_with_packed_fp16_dst(inst))
max_width = MIN2(max_width, 8);
/* Only power-of-two execution sizes are representable in the instruction
@ -4566,7 +4455,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
*/
if (inst->opcode != ELK_SHADER_OPCODE_TEX &&
inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
return devinfo->ver < 20 ? 8 : 16;
return 8;
/* Calculate the number of coordinate components that have to be present
* assuming that additional arguments follow the texel coordinates in the
@ -4581,14 +4470,6 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
inst->opcode != ELK_SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :
3;
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
* variant of the TXL or TXF message.
*/
const bool implicit_lod = devinfo->ver >= 9 &&
(inst->opcode == ELK_SHADER_OPCODE_TXL ||
inst->opcode == ELK_SHADER_OPCODE_TXF) &&
inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
/* Calculate the total number of argument components that need to be passed
* to the sampler unit.
*/
@ -4596,7 +4477,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE),
req_coord_components) +
inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
(implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +
inst->components_read(TEX_LOGICAL_SRC_LOD) +
inst->components_read(TEX_LOGICAL_SRC_LOD2) +
inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
(inst->opcode == ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
@ -4781,8 +4662,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
/* MULH is lowered to the MUL/MACH sequence using the accumulator, which
* is 8-wide on Gfx7+.
*/
return (devinfo->ver >= 20 ? 16 :
devinfo->ver >= 7 ? 8 :
return (devinfo->ver >= 7 ? 8 :
get_fpu_lowered_simd_width(shader, inst));
case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
@ -4817,7 +4697,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
/* TXD is unsupported in SIMD16 mode previous to Xe2. SIMD32 is still
* unsuppported on Xe2.
*/
return devinfo->ver < 20 ? 8 : 16;
return 8;
case ELK_SHADER_OPCODE_TXL_LOGICAL:
case ELK_FS_OPCODE_TXB_LOGICAL:
@ -4870,13 +4750,13 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
case ELK_SHADER_OPCODE_URB_READ_LOGICAL:
case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
return MIN2(devinfo->ver < 20 ? 8 : 16, inst->exec_size);
return MIN2(8, inst->exec_size);
case ELK_SHADER_OPCODE_QUAD_SWIZZLE: {
const unsigned swiz = inst->src[1].ud;
return (is_uniform(inst->src[0]) ?
get_fpu_lowered_simd_width(shader, inst) :
devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 :
type_sz(inst->src[0].type) == 4 ? 8 :
swiz == ELK_SWIZZLE_XYXY || swiz == ELK_SWIZZLE_ZWZW ? 4 :
get_fpu_lowered_simd_width(shader, inst));
}
@ -5249,7 +5129,7 @@ bool
elk_fs_visitor::lower_barycentrics()
{
const bool has_interleaved_layout = devinfo->has_pln ||
(devinfo->ver >= 7 && devinfo->ver < 20);
devinfo->ver >= 7;
bool progress = false;
if (stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout)
@ -6125,18 +6005,9 @@ elk_fs_visitor::set_tcs_invocation_id()
struct elk_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
const fs_builder bld = fs_builder(this).at_end();
const unsigned instance_id_mask =
(devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
(devinfo->ver >= 11) ? INTEL_MASK(22, 16) :
INTEL_MASK(23, 17);
const unsigned instance_id_shift =
(devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
const unsigned instance_id_mask = INTEL_MASK(23, 17);
const unsigned instance_id_shift = 17;
/* Get instance number from g0.2 bits:
* * 7:0 on DG2+
* * 22:16 on gfx11+
* * 23:17 otherwise
*/
elk_fs_reg t = bld.vgrf(ELK_REGISTER_TYPE_UD);
bld.AND(t, elk_fs_reg(retype(elk_vec1_grf(0, 2), ELK_REGISTER_TYPE_UD)),
elk_imm_ud(instance_id_mask));
@ -7341,8 +7212,6 @@ namespace elk {
{
if (!regs[0])
return elk_fs_reg();
else if (bld.shader->devinfo->ver >= 20)
return fetch_payload_reg(bld, regs, ELK_REGISTER_TYPE_F, 2);
const elk_fs_reg tmp = bld.vgrf(ELK_REGISTER_TYPE_F, 2);
const elk::fs_builder hbld = bld.exec_all().group(8, 0);

View file

@ -781,7 +781,7 @@ namespace elk {
LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
const src_reg &a) const
{
if (shader->devinfo->ver >= 6 && shader->devinfo->ver <= 10) {
if (shader->devinfo->ver >= 6) {
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
* we need to reorder the operands.
*/

View file

@ -1760,8 +1760,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
break;
case ELK_OPCODE_CSEL:
assert(devinfo->ver >= 8);
if (devinfo->ver < 10)
elk_set_default_access_mode(p, ELK_ALIGN_16);
elk_set_default_access_mode(p, ELK_ALIGN_16);
elk_CSEL(p, dst, src[0], src[1], src[2]);
break;
case ELK_OPCODE_BFREV:

View file

@ -247,8 +247,7 @@ elk_fs_visitor::emit_interpolation_setup_gfx6()
* on gfx20+. gi_reg is the 32B section of the GRF that
* contains the subspan coordinates.
*/
const struct elk_reg gi_reg = devinfo->ver >= 20 ? xe2_vec1_grf(i, 8) :
elk_vec1_grf(i + 1, 0);
const struct elk_reg gi_reg = elk_vec1_grf(i + 1, 0);
const struct elk_reg gi_uw = retype(gi_reg, ELK_REGISTER_TYPE_UW);
if (devinfo->ver >= 8 || dispatch_width == 8) {
@ -575,29 +574,6 @@ elk_fs_visitor::emit_fb_writes()
this->outputs[0].file != BAD_FILE);
assert(!prog_data->dual_src_blend || key->nr_color_regions == 1);
/* Following condition implements Wa_14017468336:
*
* "If dual source blend is enabled do not enable SIMD32 dispatch" and
* "For a thread dispatched as SIMD32, must not issue SIMD8 message with Last
* Render Target Select set."
*/
if (devinfo->ver >= 11 && devinfo->ver <= 12 &&
prog_data->dual_src_blend) {
/* The dual-source RT write messages fail to release the thread
* dependency on ICL and TGL with SIMD32 dispatch, leading to hangs.
*
* XXX - Emit an extra single-source NULL RT-write marked LastRT in
* order to release the thread dependency without disabling
* SIMD32.
*
* The dual-source RT write messages may lead to hangs with SIMD16
* dispatch on ICL due some unknown reasons, see
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/2183
*/
limit_dispatch_width(8, "Dual source blending unsupported "
"in SIMD16 and SIMD32 modes.\n");
}
do_emit_fb_writes(key->nr_color_regions, replicate_alpha);
}
@ -801,11 +777,7 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count)
elk_fs_inst *inst = abld.emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
srcs, ARRAY_SIZE(srcs));
/* For ICL Wa_1805992985 one needs additional write in the end. */
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL)
inst->eot = false;
else
inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
inst->offset = urb_offset;
urb_offset = starting_urb_offset + slot + 1;
@ -851,57 +823,6 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count)
inst->offset = 1;
return;
}
/* ICL Wa_1805992985:
*
* ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The
* send cycle, which is a urb write with an eot must be 4 phases long and
* all 8 lanes must valid.
*/
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) {
assert(dispatch_width == 8);
elk_fs_reg uniform_urb_handle = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD);
elk_fs_reg uniform_mask = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD);
elk_fs_reg payload = elk_fs_reg(VGRF, alloc.allocate(4), ELK_REGISTER_TYPE_UD);
/* Workaround requires all 8 channels (lanes) to be valid. This is
* understood to mean they all need to be alive. First trick is to find
* a live channel and copy its urb handle for all the other channels to
* make sure all handles are valid.
*/
bld.exec_all().MOV(uniform_urb_handle, bld.emit_uniformize(urb_handle));
/* Second trick is to use masked URB write where one can tell the HW to
* actually write data only for selected channels even though all are
* active.
* Third trick is to take advantage of the must-be-zero (MBZ) area in
* the very beginning of the URB.
*
* One masks data to be written only for the first channel and uses
* offset zero explicitly to land data to the MBZ area avoiding trashing
* any other part of the URB.
*
* Since the WA says that the write needs to be 4 phases long one uses
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
* area written as zeros.
*/
bld.exec_all().MOV(uniform_mask, elk_imm_ud(0x10000u));
bld.exec_all().MOV(offset(payload, bld, 0), elk_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 1), elk_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 2), elk_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 3), elk_imm_ud(0u));
elk_fs_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
srcs[URB_LOGICAL_SRC_DATA] = payload;
srcs[URB_LOGICAL_SRC_COMPONENTS] = elk_imm_ud(4);
elk_fs_inst *inst = bld.exec_all().emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL,
reg_undef, srcs, ARRAY_SIZE(srcs));
inst->eot = true;
inst->offset = 0;
}
}
void
@ -1002,7 +923,7 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
live_analysis(this), regpressure_analysis(this),
performance_analysis(this),
needs_register_pressure(needs_register_pressure),
dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8),
dispatch_width(8),
max_polygons(0),
api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width))
{

View file

@ -639,8 +639,8 @@ F(rt_message_type, /* 4+ */ MD(10), MD( 8))
* Thread Spawn message function control bits:
* @{
*/
FC(ts_resource_select, /* 4+ */ MD( 4), MD( 4), devinfo->ver < 11)
FC(ts_request_type, /* 4+ */ MD( 1), MD( 1), devinfo->ver < 11)
F(ts_resource_select, /* 4+ */ MD( 4), MD( 4))
F(ts_request_type, /* 4+ */ MD( 1), MD( 1))
F(ts_opcode, /* 4+ */ MD( 0), MD( 0))
/** @} */
@ -677,13 +677,8 @@ static inline uint64_t
elk_inst_imm_uq(const struct intel_device_info *devinfo,
const elk_inst *insn)
{
if (devinfo->ver >= 12) {
return elk_inst_bits(insn, 95, 64) << 32 |
elk_inst_bits(insn, 127, 96);
} else {
assert(devinfo->ver >= 8);
return elk_inst_bits(insn, 127, 64);
}
assert(devinfo->ver >= 8);
return elk_inst_bits(insn, 127, 64);
}
static inline float
@ -749,12 +744,7 @@ elk_inst_set_imm_df(const struct intel_device_info *devinfo,
(void) devinfo;
dt.d = value;
if (devinfo->ver >= 12) {
elk_inst_set_bits(insn, 95, 64, dt.u >> 32);
elk_inst_set_bits(insn, 127, 96, dt.u & 0xFFFFFFFF);
} else {
elk_inst_set_bits(insn, 127, 64, dt.u);
}
elk_inst_set_bits(insn, 127, 64, dt.u);
}
static inline void
@ -762,12 +752,7 @@ elk_inst_set_imm_uq(const struct intel_device_info *devinfo,
elk_inst *insn, uint64_t value)
{
(void) devinfo;
if (devinfo->ver >= 12) {
elk_inst_set_bits(insn, 95, 64, value >> 32);
elk_inst_set_bits(insn, 127, 96, value & 0xFFFFFFFF);
} else {
elk_inst_set_bits(insn, 127, 64, value);
}
elk_inst_set_bits(insn, 127, 64, value);
}
/** @} */
@ -802,25 +787,14 @@ REG_TYPE(src1)
/* The AddrImm fields are split into two discontiguous sections on Gfx8+ */
#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \
g12_high, g12_low, g20_high, g20_low, g20_zero) \
#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \
static inline void \
elk_inst_set_##reg##_ia1_addr_imm(const struct \
intel_device_info *devinfo, \
elk_inst *inst, \
unsigned value) \
{ \
if (devinfo->ver >= 20) { \
assert((value & ~0x7ff) == 0); \
elk_inst_set_bits(inst, g20_high, g20_low, value >> 1); \
if (g20_zero == -1) \
assert((value & 1) == 0); \
else \
elk_inst_set_bits(inst, g20_zero, g20_zero, value & 1); \
} else if (devinfo->ver >= 12) { \
assert((value & ~0x3ff) == 0); \
elk_inst_set_bits(inst, g12_high, g12_low, value); \
} else if (devinfo->ver >= 8) { \
if (devinfo->ver >= 8) { \
assert((value & ~0x3ff) == 0); \
elk_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff); \
elk_inst_set_bits(inst, g8_nine, g8_nine, value >> 9); \
@ -833,13 +807,7 @@ static inline unsigned \
elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \
const elk_inst *inst) \
{ \
if (devinfo->ver >= 20) { \
return elk_inst_bits(inst, g20_high, g20_low) << 1 | \
(g20_zero == -1 ? 0 : \
elk_inst_bits(inst, g20_zero, g20_zero)); \
} else if (devinfo->ver >= 12) { \
return elk_inst_bits(inst, g12_high, g12_low); \
} else if (devinfo->ver >= 8) { \
if (devinfo->ver >= 8) { \
return elk_inst_bits(inst, g8_high, g8_low) | \
(elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
} else { \
@ -847,11 +815,11 @@ elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \
} \
}
/* AddrImm for Align1 Indirect Addressing */
/* -Gen 4- ----Gfx8---- -Gfx12- ---Gfx20--- */
ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98, 107, 98, -1)
ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66, 75, 66, 87)
ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50, 59, 50, 33)
/* AddrImm for Align1 Indirect Addressing */
/* -Gen 4- ----Gfx8---- */
ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96)
ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64)
ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48)
#define ELK_IA16_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \
static inline void \
@ -859,7 +827,6 @@ elk_inst_set_##reg##_ia16_addr_imm(const struct \
intel_device_info *devinfo, \
elk_inst *inst, unsigned value) \
{ \
assert(devinfo->ver < 12); \
assert((value & ~0x3ff) == 0); \
if (devinfo->ver >= 8) { \
assert(GET_BITS(value, 3, 0) == 0); \
@ -873,7 +840,6 @@ static inline unsigned \
elk_inst_##reg##_ia16_addr_imm(const struct intel_device_info *devinfo, \
const elk_inst *inst) \
{ \
assert(devinfo->ver < 12); \
if (devinfo->ver >= 8) { \
return (elk_inst_bits(inst, g8_high, g8_low) << 4) | \
(elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
@ -1049,12 +1015,8 @@ static inline unsigned
elk_compact_inst_imm(const struct intel_device_info *devinfo,
const elk_compact_inst *inst)
{
if (devinfo->ver >= 12) {
return elk_compact_inst_bits(inst, 63, 52);
} else {
return (elk_compact_inst_bits(inst, 39, 35) << 8) |
(elk_compact_inst_bits(inst, 63, 56));
}
return (elk_compact_inst_bits(inst, 39, 35) << 8) |
(elk_compact_inst_bits(inst, 63, 56));
}
/**

View file

@ -528,7 +528,7 @@ namespace elk {
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
* we need to reorder the operands.
*/
assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
assert(shader->devinfo->ver >= 6);
return emit(ELK_OPCODE_LRP, dst, a, y, x);
}