mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
intel/elk: Remove remaining Gfx9+ code
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
parent
ea12b38602
commit
fd3a815a5b
9 changed files with 77 additions and 365 deletions
|
|
@ -86,7 +86,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
* destination type can be Quadword and source type Doubleword for Gfx8 and
|
||||
* Gfx9. So, lower 64 bit multiply instruction on rest of the platforms.
|
||||
*/
|
||||
if (devinfo->ver < 8 || devinfo->ver > 9)
|
||||
if (devinfo->ver < 8)
|
||||
int64_options |= nir_lower_imul_2x32_64;
|
||||
|
||||
/* We want the GLSL compiler to emit code that uses condition codes */
|
||||
|
|
@ -107,8 +107,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
nir_options->lower_ffma16 = devinfo->ver < 6;
|
||||
nir_options->lower_ffma32 = devinfo->ver < 6;
|
||||
nir_options->lower_ffma64 = devinfo->ver < 6;
|
||||
nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11;
|
||||
nir_options->lower_fpow = devinfo->ver >= 12;
|
||||
nir_options->lower_flrp32 = devinfo->ver < 6;
|
||||
|
||||
nir_options->has_bfe = devinfo->ver >= 7;
|
||||
nir_options->has_bfm = devinfo->ver >= 7;
|
||||
|
|
@ -127,9 +126,8 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
|
|||
elk_nir_no_indirect_mask(compiler, i);
|
||||
nir_options->force_indirect_unrolling_sampler = devinfo->ver < 7;
|
||||
|
||||
if (devinfo->ver < 12)
|
||||
nir_options->divergence_analysis_options |=
|
||||
nir_divergence_single_prim_per_subgroup;
|
||||
nir_options->divergence_analysis_options |=
|
||||
nir_divergence_single_prim_per_subgroup;
|
||||
|
||||
compiler->nir_options[i] = nir_options;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1799,7 +1799,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
|
|||
* to do a full test run with elk_fs_test_dispatch_packing() hooked up to
|
||||
* the NIR front-end before changing this assertion.
|
||||
*/
|
||||
assert(devinfo->ver <= 12);
|
||||
assert(devinfo->ver <= 8);
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_FRAGMENT: {
|
||||
|
|
@ -1813,8 +1813,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
|
|||
*/
|
||||
const struct elk_wm_prog_data *wm_prog_data =
|
||||
(const struct elk_wm_prog_data *)prog_data;
|
||||
return devinfo->verx10 < 125 &&
|
||||
!wm_prog_data->persample_dispatch &&
|
||||
return !wm_prog_data->persample_dispatch &&
|
||||
wm_prog_data->uses_vmask &&
|
||||
max_polygons < 2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ ALU2(SUBB)
|
|||
static inline unsigned
|
||||
reg_unit(const struct intel_device_info *devinfo)
|
||||
{
|
||||
return devinfo->ver >= 20 ? 2 : 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -387,20 +387,6 @@ elk_sampler_desc(const struct intel_device_info *devinfo,
|
|||
const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
|
||||
SET_BITS(sampler, 11, 8));
|
||||
|
||||
/* From GFX20 Bspec: Shared Functions - Message Descriptor -
|
||||
* Sampling Engine:
|
||||
*
|
||||
* Message Type[5] 31 This bit represents the upper bit of message type
|
||||
* 6-bit encoding (c.f. [16:12]). This bit is set
|
||||
* for messages with programmable offsets.
|
||||
*/
|
||||
if (devinfo->ver >= 20)
|
||||
return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
|
||||
SET_BITS(simd_mode & 0x3, 18, 17) |
|
||||
SET_BITS(simd_mode >> 2, 29, 29) |
|
||||
SET_BITS(return_format, 30, 30) |
|
||||
SET_BITS(msg_type >> 5, 31, 31);
|
||||
|
||||
/* From the CHV Bspec: Shared Functions - Message Descriptor -
|
||||
* Sampling Engine:
|
||||
*
|
||||
|
|
@ -443,9 +429,7 @@ elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
|
|||
static inline unsigned
|
||||
elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
|
||||
{
|
||||
if (devinfo->ver >= 20)
|
||||
return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
|
||||
else if (devinfo->ver >= 7)
|
||||
if (devinfo->ver >= 7)
|
||||
return GET_BITS(desc, 16, 12);
|
||||
else if (devinfo->verx10 >= 45)
|
||||
return GET_BITS(desc, 15, 12);
|
||||
|
|
@ -1066,7 +1050,7 @@ elk_fb_write_desc(const struct intel_device_info *devinfo,
|
|||
GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
|
||||
ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
|
||||
|
||||
assert(devinfo->ver >= 10 || !coarse_write);
|
||||
assert(!coarse_write);
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
|
||||
|
|
@ -1121,14 +1105,6 @@ elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
|
|||
return GET_BITS(desc, 15, 15);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
elk_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
|
||||
uint32_t desc)
|
||||
{
|
||||
assert(devinfo->ver >= 10);
|
||||
return GET_BITS(desc, 18, 18);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
|
||||
{
|
||||
|
|
@ -1570,18 +1546,6 @@ elk_mdc_sm2_exec_size(uint32_t sm2)
|
|||
return 8 << sm2;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
elk_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
|
||||
unsigned exec_size, unsigned msg_type)
|
||||
{
|
||||
assert(devinfo->has_ray_tracing);
|
||||
assert(devinfo->ver < 20 || exec_size == 16);
|
||||
|
||||
return SET_BITS(0, 19, 19) | /* No header */
|
||||
SET_BITS(msg_type, 17, 14) |
|
||||
SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
|
||||
uint32_t desc)
|
||||
|
|
@ -1612,7 +1576,7 @@ elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
|
|||
const bool simd_mode = exec_size == 16;
|
||||
const bool slot_group = group >= 16;
|
||||
|
||||
assert(devinfo->ver >= 10 || !coarse_pixel_rate);
|
||||
assert(!coarse_pixel_rate);
|
||||
return (SET_BITS(slot_group, 11, 11) |
|
||||
SET_BITS(msg_type, 13, 12) |
|
||||
SET_BITS(!!noperspective, 14, 14) |
|
||||
|
|
|
|||
|
|
@ -435,28 +435,7 @@ elk_fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
|
|||
if (is_send_from_grf())
|
||||
return false;
|
||||
|
||||
/* From Wa_1604601757:
|
||||
*
|
||||
* "When multiplying a DW and any lower precision integer, source modifier
|
||||
* is not supported."
|
||||
*/
|
||||
if (devinfo->ver >= 12 && (opcode == ELK_OPCODE_MUL ||
|
||||
opcode == ELK_OPCODE_MAD)) {
|
||||
const elk_reg_type exec_type = get_exec_type(this);
|
||||
const unsigned min_type_sz = opcode == ELK_OPCODE_MAD ?
|
||||
MIN2(type_sz(src[1].type), type_sz(src[2].type)) :
|
||||
MIN2(type_sz(src[0].type), type_sz(src[1].type));
|
||||
|
||||
if (elk_reg_type_is_integer(exec_type) &&
|
||||
type_sz(exec_type) >= 4 &&
|
||||
type_sz(exec_type) != min_type_sz)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!elk_backend_instruction::can_do_source_mods())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return elk_backend_instruction::can_do_source_mods();
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -939,24 +918,20 @@ namespace {
|
|||
unsigned
|
||||
predicate_width(const intel_device_info *devinfo, elk_predicate predicate)
|
||||
{
|
||||
if (devinfo->ver >= 20) {
|
||||
return 1;
|
||||
} else {
|
||||
switch (predicate) {
|
||||
case ELK_PREDICATE_NONE: return 1;
|
||||
case ELK_PREDICATE_NORMAL: return 1;
|
||||
case ELK_PREDICATE_ALIGN1_ANY2H: return 2;
|
||||
case ELK_PREDICATE_ALIGN1_ALL2H: return 2;
|
||||
case ELK_PREDICATE_ALIGN1_ANY4H: return 4;
|
||||
case ELK_PREDICATE_ALIGN1_ALL4H: return 4;
|
||||
case ELK_PREDICATE_ALIGN1_ANY8H: return 8;
|
||||
case ELK_PREDICATE_ALIGN1_ALL8H: return 8;
|
||||
case ELK_PREDICATE_ALIGN1_ANY16H: return 16;
|
||||
case ELK_PREDICATE_ALIGN1_ALL16H: return 16;
|
||||
case ELK_PREDICATE_ALIGN1_ANY32H: return 32;
|
||||
case ELK_PREDICATE_ALIGN1_ALL32H: return 32;
|
||||
default: unreachable("Unsupported predicate");
|
||||
}
|
||||
switch (predicate) {
|
||||
case ELK_PREDICATE_NONE: return 1;
|
||||
case ELK_PREDICATE_NORMAL: return 1;
|
||||
case ELK_PREDICATE_ALIGN1_ANY2H: return 2;
|
||||
case ELK_PREDICATE_ALIGN1_ALL2H: return 2;
|
||||
case ELK_PREDICATE_ALIGN1_ANY4H: return 4;
|
||||
case ELK_PREDICATE_ALIGN1_ALL4H: return 4;
|
||||
case ELK_PREDICATE_ALIGN1_ANY8H: return 8;
|
||||
case ELK_PREDICATE_ALIGN1_ALL8H: return 8;
|
||||
case ELK_PREDICATE_ALIGN1_ANY16H: return 16;
|
||||
case ELK_PREDICATE_ALIGN1_ALL16H: return 16;
|
||||
case ELK_PREDICATE_ALIGN1_ANY32H: return 32;
|
||||
case ELK_PREDICATE_ALIGN1_ALL32H: return 32;
|
||||
default: unreachable("Unsupported predicate");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -996,8 +971,8 @@ namespace {
|
|||
unsigned
|
||||
elk_fs_inst::flags_read(const intel_device_info *devinfo) const
|
||||
{
|
||||
if (devinfo->ver < 20 && (predicate == ELK_PREDICATE_ALIGN1_ANYV ||
|
||||
predicate == ELK_PREDICATE_ALIGN1_ALLV)) {
|
||||
if (predicate == ELK_PREDICATE_ALIGN1_ANYV ||
|
||||
predicate == ELK_PREDICATE_ALIGN1_ALLV) {
|
||||
/* The vertical predication modes combine corresponding bits from
|
||||
* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
|
||||
*/
|
||||
|
|
@ -1275,17 +1250,6 @@ elk_fs_visitor::assign_curb_setup()
|
|||
prog_data->curb_read_length = uniform_push_length + ubo_push_length;
|
||||
|
||||
uint64_t used = 0;
|
||||
bool is_compute = gl_shader_stage_is_compute(stage);
|
||||
|
||||
if (is_compute && elk_cs_prog_data(prog_data)->uses_inline_data) {
|
||||
/* With COMPUTE_WALKER, we can push up to one register worth of data via
|
||||
* the inline data parameter in the COMPUTE_WALKER command itself.
|
||||
*
|
||||
* TODO: Support inline data and push at the same time.
|
||||
*/
|
||||
assert(devinfo->verx10 >= 125);
|
||||
assert(uniform_push_length <= reg_unit(devinfo));
|
||||
}
|
||||
|
||||
/* Map the offsets in the UNIFORM file to fixed HW regs. */
|
||||
foreach_block_and_inst(block, elk_fs_inst, inst, cfg) {
|
||||
|
|
@ -1602,78 +1566,22 @@ elk_fs_visitor::assign_urb_setup()
|
|||
* representation described above into an offset and a
|
||||
* grf, which contains the plane parameters for the first
|
||||
* polygon processed by the thread.
|
||||
*
|
||||
* Earlier platforms and per-primitive block pack 2 logical
|
||||
* input components per 32B register.
|
||||
*/
|
||||
if (devinfo->ver >= 20 && !per_prim) {
|
||||
/* Gfx20+ is able to pack 5 logical input components
|
||||
* per 64B register for vertex setup data.
|
||||
*/
|
||||
const unsigned grf = base + idx / 5 * 2 * max_polygons;
|
||||
assert(inst->src[i].offset / param_width < 12);
|
||||
const unsigned delta = idx % 5 * 12 +
|
||||
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
|
||||
inst->src[i].offset % chan_sz;
|
||||
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
|
||||
delta);
|
||||
} else {
|
||||
/* Earlier platforms and per-primitive block pack 2 logical
|
||||
* input components per 32B register.
|
||||
*/
|
||||
const unsigned grf = base + idx / 2 * max_polygons;
|
||||
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
|
||||
const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
|
||||
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
|
||||
inst->src[i].offset % chan_sz;
|
||||
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
|
||||
delta);
|
||||
}
|
||||
const unsigned grf = base + idx / 2 * max_polygons;
|
||||
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
|
||||
const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
|
||||
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
|
||||
inst->src[i].offset % chan_sz;
|
||||
reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type),
|
||||
delta);
|
||||
|
||||
if (max_polygons > 1) {
|
||||
assert(devinfo->ver >= 12);
|
||||
/* Misaligned channel strides that would lead to
|
||||
* cross-channel access in the representation above are
|
||||
* disallowed.
|
||||
*/
|
||||
assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz);
|
||||
|
||||
/* Number of channels processing the same polygon. */
|
||||
const unsigned poly_width = dispatch_width / max_polygons;
|
||||
assert(dispatch_width % max_polygons == 0);
|
||||
|
||||
/* Accessing a subset of channels of a parameter vector
|
||||
* starting from "chan" is necessary to handle
|
||||
* SIMD-lowered instructions though.
|
||||
*/
|
||||
const unsigned chan = inst->src[i].offset %
|
||||
(param_width * chan_sz) / chan_sz;
|
||||
assert(chan < dispatch_width);
|
||||
assert(chan % poly_width == 0);
|
||||
const unsigned reg_size = reg_unit(devinfo) * REG_SIZE;
|
||||
reg = byte_offset(reg, chan / poly_width * reg_size);
|
||||
|
||||
if (inst->exec_size > poly_width) {
|
||||
/* Accessing the parameters for multiple polygons.
|
||||
* Corresponding parameters for different polygons
|
||||
* are stored a GRF apart on the thread payload, so
|
||||
* use that as vertical stride.
|
||||
*/
|
||||
const unsigned vstride = reg_size / type_sz(inst->src[i].type);
|
||||
assert(vstride <= 32);
|
||||
assert(chan % poly_width == 0);
|
||||
reg = stride(reg, vstride, poly_width, 0);
|
||||
} else {
|
||||
/* Accessing one parameter for a single polygon --
|
||||
* Translate to a scalar region.
|
||||
*/
|
||||
assert(chan % poly_width + inst->exec_size <= poly_width);
|
||||
reg = stride(reg, 0, 1, 0);
|
||||
}
|
||||
|
||||
} else {
|
||||
const unsigned width = inst->src[i].stride == 0 ?
|
||||
1 : MIN2(inst->exec_size, 8);
|
||||
reg = stride(reg, width * inst->src[i].stride,
|
||||
width, inst->src[i].stride);
|
||||
}
|
||||
const unsigned width = inst->src[i].stride == 0 ?
|
||||
1 : MIN2(inst->exec_size, 8);
|
||||
reg = stride(reg, width * inst->src[i].stride,
|
||||
width, inst->src[i].stride);
|
||||
|
||||
reg.abs = inst->src[i].abs;
|
||||
reg.negate = inst->src[i].negate;
|
||||
|
|
@ -2078,9 +1986,6 @@ elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
|||
if (prog_data->nr_params == 0)
|
||||
return -1;
|
||||
|
||||
if (devinfo->verx10 >= 125)
|
||||
return -1;
|
||||
|
||||
/* The local thread id is always the last parameter in the list */
|
||||
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
|
||||
if (last_param == ELK_PARAM_BUILTIN_SUBGROUP_ID)
|
||||
|
|
@ -3787,20 +3692,7 @@ elk_fs_visitor::lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block)
|
|||
|
||||
bool do_addition = true;
|
||||
if (devinfo->ver >= 7) {
|
||||
/* From Wa_1604601757:
|
||||
*
|
||||
* "When multiplying a DW and any lower precision integer, source modifier
|
||||
* is not supported."
|
||||
*
|
||||
* An unsupported negate modifier on src[1] would ordinarily be
|
||||
* lowered by the subsequent lower_regioning pass. In this case that
|
||||
* pass would spawn another dword multiply. Instead, lower the
|
||||
* modifier first.
|
||||
*/
|
||||
const bool source_mods_unsupported = (devinfo->ver >= 12);
|
||||
|
||||
if (inst->src[1].abs || (inst->src[1].negate &&
|
||||
source_mods_unsupported))
|
||||
if (inst->src[1].abs)
|
||||
lower_src_modifiers(this, block, inst, 1);
|
||||
|
||||
if (inst->src[1].file == IMM) {
|
||||
|
|
@ -4027,8 +3919,7 @@ elk_fs_visitor::lower_integer_multiplication()
|
|||
} else if (!inst->dst.is_accumulator() &&
|
||||
(inst->dst.type == ELK_REGISTER_TYPE_D ||
|
||||
inst->dst.type == ELK_REGISTER_TYPE_UD) &&
|
||||
(!devinfo->has_integer_dword_mul ||
|
||||
devinfo->verx10 >= 125)) {
|
||||
!devinfo->has_integer_dword_mul) {
|
||||
lower_mul_dword_inst(inst, block);
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
|
|
@ -4192,7 +4083,6 @@ elk_sample_mask_reg(const fs_builder &bld)
|
|||
return elk_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16);
|
||||
} else {
|
||||
assert(s.devinfo->ver >= 6 && bld.dispatch_width() <= 16);
|
||||
assert(s.devinfo->ver < 20);
|
||||
return retype(elk_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),
|
||||
ELK_REGISTER_TYPE_UW);
|
||||
}
|
||||
|
|
@ -4258,7 +4148,6 @@ elk_emit_predicate_on_sample_mask(const fs_builder &bld, elk_fs_inst *inst)
|
|||
assert(inst->predicate == ELK_PREDICATE_NORMAL);
|
||||
assert(!inst->predicate_inverse);
|
||||
assert(inst->flag_subreg == 0);
|
||||
assert(s.devinfo->ver < 20);
|
||||
/* Combine the sample mask with the existing predicate by using a
|
||||
* vertical predication mode.
|
||||
*/
|
||||
|
|
@ -4458,7 +4347,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
|
|||
* "Ternary instruction with condition modifiers must not use SIMD32."
|
||||
*/
|
||||
if (inst->conditional_mod && (devinfo->ver < 8 ||
|
||||
(inst->elk_is_3src(compiler) && devinfo->ver < 12)))
|
||||
inst->elk_is_3src(compiler)))
|
||||
max_width = MIN2(max_width, 16);
|
||||
|
||||
/* From the IVB PRMs (applies to other devices that don't have the
|
||||
|
|
@ -4521,7 +4410,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
|
|||
* instructions do not support HF types and conversions from/to F are
|
||||
* required.
|
||||
*/
|
||||
if (is_mixed_float_with_fp32_dst(inst) && devinfo->ver < 20)
|
||||
if (is_mixed_float_with_fp32_dst(inst))
|
||||
max_width = MIN2(max_width, 8);
|
||||
|
||||
/* From the SKL PRM, Special Restrictions for Handling Mixed Mode
|
||||
|
|
@ -4530,7 +4419,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
|
|||
* "No SIMD16 in mixed mode when destination is packed f16 for both
|
||||
* Align1 and Align16."
|
||||
*/
|
||||
if (is_mixed_float_with_packed_fp16_dst(inst) && devinfo->ver < 20)
|
||||
if (is_mixed_float_with_packed_fp16_dst(inst))
|
||||
max_width = MIN2(max_width, 8);
|
||||
|
||||
/* Only power-of-two execution sizes are representable in the instruction
|
||||
|
|
@ -4566,7 +4455,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
if (inst->opcode != ELK_SHADER_OPCODE_TEX &&
|
||||
inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
|
||||
return devinfo->ver < 20 ? 8 : 16;
|
||||
return 8;
|
||||
|
||||
/* Calculate the number of coordinate components that have to be present
|
||||
* assuming that additional arguments follow the texel coordinates in the
|
||||
|
|
@ -4581,14 +4470,6 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
|
|||
inst->opcode != ELK_SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 :
|
||||
3;
|
||||
|
||||
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
|
||||
* variant of the TXL or TXF message.
|
||||
*/
|
||||
const bool implicit_lod = devinfo->ver >= 9 &&
|
||||
(inst->opcode == ELK_SHADER_OPCODE_TXL ||
|
||||
inst->opcode == ELK_SHADER_OPCODE_TXF) &&
|
||||
inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
|
||||
|
||||
/* Calculate the total number of argument components that need to be passed
|
||||
* to the sampler unit.
|
||||
*/
|
||||
|
|
@ -4596,7 +4477,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
|
|||
MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE),
|
||||
req_coord_components) +
|
||||
inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
|
||||
(implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +
|
||||
inst->components_read(TEX_LOGICAL_SRC_LOD) +
|
||||
inst->components_read(TEX_LOGICAL_SRC_LOD2) +
|
||||
inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
|
||||
(inst->opcode == ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
|
||||
|
|
@ -4781,8 +4662,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
|
|||
/* MULH is lowered to the MUL/MACH sequence using the accumulator, which
|
||||
* is 8-wide on Gfx7+.
|
||||
*/
|
||||
return (devinfo->ver >= 20 ? 16 :
|
||||
devinfo->ver >= 7 ? 8 :
|
||||
return (devinfo->ver >= 7 ? 8 :
|
||||
get_fpu_lowered_simd_width(shader, inst));
|
||||
|
||||
case ELK_FS_OPCODE_FB_WRITE_LOGICAL:
|
||||
|
|
@ -4817,7 +4697,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
|
|||
/* TXD is unsupported in SIMD16 mode previous to Xe2. SIMD32 is still
|
||||
* unsuppported on Xe2.
|
||||
*/
|
||||
return devinfo->ver < 20 ? 8 : 16;
|
||||
return 8;
|
||||
|
||||
case ELK_SHADER_OPCODE_TXL_LOGICAL:
|
||||
case ELK_FS_OPCODE_TXB_LOGICAL:
|
||||
|
|
@ -4870,13 +4750,13 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst)
|
|||
|
||||
case ELK_SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
return MIN2(devinfo->ver < 20 ? 8 : 16, inst->exec_size);
|
||||
return MIN2(8, inst->exec_size);
|
||||
|
||||
case ELK_SHADER_OPCODE_QUAD_SWIZZLE: {
|
||||
const unsigned swiz = inst->src[1].ud;
|
||||
return (is_uniform(inst->src[0]) ?
|
||||
get_fpu_lowered_simd_width(shader, inst) :
|
||||
devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 :
|
||||
type_sz(inst->src[0].type) == 4 ? 8 :
|
||||
swiz == ELK_SWIZZLE_XYXY || swiz == ELK_SWIZZLE_ZWZW ? 4 :
|
||||
get_fpu_lowered_simd_width(shader, inst));
|
||||
}
|
||||
|
|
@ -5249,7 +5129,7 @@ bool
|
|||
elk_fs_visitor::lower_barycentrics()
|
||||
{
|
||||
const bool has_interleaved_layout = devinfo->has_pln ||
|
||||
(devinfo->ver >= 7 && devinfo->ver < 20);
|
||||
devinfo->ver >= 7;
|
||||
bool progress = false;
|
||||
|
||||
if (stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout)
|
||||
|
|
@ -6125,18 +6005,9 @@ elk_fs_visitor::set_tcs_invocation_id()
|
|||
struct elk_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
|
||||
const unsigned instance_id_mask =
|
||||
(devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
|
||||
(devinfo->ver >= 11) ? INTEL_MASK(22, 16) :
|
||||
INTEL_MASK(23, 17);
|
||||
const unsigned instance_id_shift =
|
||||
(devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
|
||||
const unsigned instance_id_mask = INTEL_MASK(23, 17);
|
||||
const unsigned instance_id_shift = 17;
|
||||
|
||||
/* Get instance number from g0.2 bits:
|
||||
* * 7:0 on DG2+
|
||||
* * 22:16 on gfx11+
|
||||
* * 23:17 otherwise
|
||||
*/
|
||||
elk_fs_reg t = bld.vgrf(ELK_REGISTER_TYPE_UD);
|
||||
bld.AND(t, elk_fs_reg(retype(elk_vec1_grf(0, 2), ELK_REGISTER_TYPE_UD)),
|
||||
elk_imm_ud(instance_id_mask));
|
||||
|
|
@ -7341,8 +7212,6 @@ namespace elk {
|
|||
{
|
||||
if (!regs[0])
|
||||
return elk_fs_reg();
|
||||
else if (bld.shader->devinfo->ver >= 20)
|
||||
return fetch_payload_reg(bld, regs, ELK_REGISTER_TYPE_F, 2);
|
||||
|
||||
const elk_fs_reg tmp = bld.vgrf(ELK_REGISTER_TYPE_F, 2);
|
||||
const elk::fs_builder hbld = bld.exec_all().group(8, 0);
|
||||
|
|
|
|||
|
|
@ -781,7 +781,7 @@ namespace elk {
|
|||
LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
|
||||
const src_reg &a) const
|
||||
{
|
||||
if (shader->devinfo->ver >= 6 && shader->devinfo->ver <= 10) {
|
||||
if (shader->devinfo->ver >= 6) {
|
||||
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
|
||||
* we need to reorder the operands.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1760,8 +1760,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
case ELK_OPCODE_CSEL:
|
||||
assert(devinfo->ver >= 8);
|
||||
if (devinfo->ver < 10)
|
||||
elk_set_default_access_mode(p, ELK_ALIGN_16);
|
||||
elk_set_default_access_mode(p, ELK_ALIGN_16);
|
||||
elk_CSEL(p, dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
case ELK_OPCODE_BFREV:
|
||||
|
|
|
|||
|
|
@ -247,8 +247,7 @@ elk_fs_visitor::emit_interpolation_setup_gfx6()
|
|||
* on gfx20+. gi_reg is the 32B section of the GRF that
|
||||
* contains the subspan coordinates.
|
||||
*/
|
||||
const struct elk_reg gi_reg = devinfo->ver >= 20 ? xe2_vec1_grf(i, 8) :
|
||||
elk_vec1_grf(i + 1, 0);
|
||||
const struct elk_reg gi_reg = elk_vec1_grf(i + 1, 0);
|
||||
const struct elk_reg gi_uw = retype(gi_reg, ELK_REGISTER_TYPE_UW);
|
||||
|
||||
if (devinfo->ver >= 8 || dispatch_width == 8) {
|
||||
|
|
@ -575,29 +574,6 @@ elk_fs_visitor::emit_fb_writes()
|
|||
this->outputs[0].file != BAD_FILE);
|
||||
assert(!prog_data->dual_src_blend || key->nr_color_regions == 1);
|
||||
|
||||
/* Following condition implements Wa_14017468336:
|
||||
*
|
||||
* "If dual source blend is enabled do not enable SIMD32 dispatch" and
|
||||
* "For a thread dispatched as SIMD32, must not issue SIMD8 message with Last
|
||||
* Render Target Select set."
|
||||
*/
|
||||
if (devinfo->ver >= 11 && devinfo->ver <= 12 &&
|
||||
prog_data->dual_src_blend) {
|
||||
/* The dual-source RT write messages fail to release the thread
|
||||
* dependency on ICL and TGL with SIMD32 dispatch, leading to hangs.
|
||||
*
|
||||
* XXX - Emit an extra single-source NULL RT-write marked LastRT in
|
||||
* order to release the thread dependency without disabling
|
||||
* SIMD32.
|
||||
*
|
||||
* The dual-source RT write messages may lead to hangs with SIMD16
|
||||
* dispatch on ICL due some unknown reasons, see
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/2183
|
||||
*/
|
||||
limit_dispatch_width(8, "Dual source blending unsupported "
|
||||
"in SIMD16 and SIMD32 modes.\n");
|
||||
}
|
||||
|
||||
do_emit_fb_writes(key->nr_color_regions, replicate_alpha);
|
||||
}
|
||||
|
||||
|
|
@ -801,11 +777,7 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count)
|
|||
elk_fs_inst *inst = abld.emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
|
||||
/* For ICL Wa_1805992985 one needs additional write in the end. */
|
||||
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL)
|
||||
inst->eot = false;
|
||||
else
|
||||
inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
|
||||
inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
|
||||
|
||||
inst->offset = urb_offset;
|
||||
urb_offset = starting_urb_offset + slot + 1;
|
||||
|
|
@ -851,57 +823,6 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count)
|
|||
inst->offset = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* ICL Wa_1805992985:
|
||||
*
|
||||
* ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The
|
||||
* send cycle, which is a urb write with an eot must be 4 phases long and
|
||||
* all 8 lanes must valid.
|
||||
*/
|
||||
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) {
|
||||
assert(dispatch_width == 8);
|
||||
elk_fs_reg uniform_urb_handle = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD);
|
||||
elk_fs_reg uniform_mask = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD);
|
||||
elk_fs_reg payload = elk_fs_reg(VGRF, alloc.allocate(4), ELK_REGISTER_TYPE_UD);
|
||||
|
||||
/* Workaround requires all 8 channels (lanes) to be valid. This is
|
||||
* understood to mean they all need to be alive. First trick is to find
|
||||
* a live channel and copy its urb handle for all the other channels to
|
||||
* make sure all handles are valid.
|
||||
*/
|
||||
bld.exec_all().MOV(uniform_urb_handle, bld.emit_uniformize(urb_handle));
|
||||
|
||||
/* Second trick is to use masked URB write where one can tell the HW to
|
||||
* actually write data only for selected channels even though all are
|
||||
* active.
|
||||
* Third trick is to take advantage of the must-be-zero (MBZ) area in
|
||||
* the very beginning of the URB.
|
||||
*
|
||||
* One masks data to be written only for the first channel and uses
|
||||
* offset zero explicitly to land data to the MBZ area avoiding trashing
|
||||
* any other part of the URB.
|
||||
*
|
||||
* Since the WA says that the write needs to be 4 phases long one uses
|
||||
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
|
||||
* area written as zeros.
|
||||
*/
|
||||
bld.exec_all().MOV(uniform_mask, elk_imm_ud(0x10000u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 0), elk_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 1), elk_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 2), elk_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 3), elk_imm_ud(0u));
|
||||
|
||||
elk_fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = elk_imm_ud(4);
|
||||
|
||||
elk_fs_inst *inst = bld.exec_all().emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->eot = true;
|
||||
inst->offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1002,7 +923,7 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
|
|||
live_analysis(this), regpressure_analysis(this),
|
||||
performance_analysis(this),
|
||||
needs_register_pressure(needs_register_pressure),
|
||||
dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8),
|
||||
dispatch_width(8),
|
||||
max_polygons(0),
|
||||
api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -639,8 +639,8 @@ F(rt_message_type, /* 4+ */ MD(10), MD( 8))
|
|||
* Thread Spawn message function control bits:
|
||||
* @{
|
||||
*/
|
||||
FC(ts_resource_select, /* 4+ */ MD( 4), MD( 4), devinfo->ver < 11)
|
||||
FC(ts_request_type, /* 4+ */ MD( 1), MD( 1), devinfo->ver < 11)
|
||||
F(ts_resource_select, /* 4+ */ MD( 4), MD( 4))
|
||||
F(ts_request_type, /* 4+ */ MD( 1), MD( 1))
|
||||
F(ts_opcode, /* 4+ */ MD( 0), MD( 0))
|
||||
/** @} */
|
||||
|
||||
|
|
@ -677,13 +677,8 @@ static inline uint64_t
|
|||
elk_inst_imm_uq(const struct intel_device_info *devinfo,
|
||||
const elk_inst *insn)
|
||||
{
|
||||
if (devinfo->ver >= 12) {
|
||||
return elk_inst_bits(insn, 95, 64) << 32 |
|
||||
elk_inst_bits(insn, 127, 96);
|
||||
} else {
|
||||
assert(devinfo->ver >= 8);
|
||||
return elk_inst_bits(insn, 127, 64);
|
||||
}
|
||||
assert(devinfo->ver >= 8);
|
||||
return elk_inst_bits(insn, 127, 64);
|
||||
}
|
||||
|
||||
static inline float
|
||||
|
|
@ -749,12 +744,7 @@ elk_inst_set_imm_df(const struct intel_device_info *devinfo,
|
|||
(void) devinfo;
|
||||
dt.d = value;
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
elk_inst_set_bits(insn, 95, 64, dt.u >> 32);
|
||||
elk_inst_set_bits(insn, 127, 96, dt.u & 0xFFFFFFFF);
|
||||
} else {
|
||||
elk_inst_set_bits(insn, 127, 64, dt.u);
|
||||
}
|
||||
elk_inst_set_bits(insn, 127, 64, dt.u);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -762,12 +752,7 @@ elk_inst_set_imm_uq(const struct intel_device_info *devinfo,
|
|||
elk_inst *insn, uint64_t value)
|
||||
{
|
||||
(void) devinfo;
|
||||
if (devinfo->ver >= 12) {
|
||||
elk_inst_set_bits(insn, 95, 64, value >> 32);
|
||||
elk_inst_set_bits(insn, 127, 96, value & 0xFFFFFFFF);
|
||||
} else {
|
||||
elk_inst_set_bits(insn, 127, 64, value);
|
||||
}
|
||||
elk_inst_set_bits(insn, 127, 64, value);
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
|
@ -802,25 +787,14 @@ REG_TYPE(src1)
|
|||
|
||||
|
||||
/* The AddrImm fields are split into two discontiguous sections on Gfx8+ */
|
||||
#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \
|
||||
g12_high, g12_low, g20_high, g20_low, g20_zero) \
|
||||
#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \
|
||||
static inline void \
|
||||
elk_inst_set_##reg##_ia1_addr_imm(const struct \
|
||||
intel_device_info *devinfo, \
|
||||
elk_inst *inst, \
|
||||
unsigned value) \
|
||||
{ \
|
||||
if (devinfo->ver >= 20) { \
|
||||
assert((value & ~0x7ff) == 0); \
|
||||
elk_inst_set_bits(inst, g20_high, g20_low, value >> 1); \
|
||||
if (g20_zero == -1) \
|
||||
assert((value & 1) == 0); \
|
||||
else \
|
||||
elk_inst_set_bits(inst, g20_zero, g20_zero, value & 1); \
|
||||
} else if (devinfo->ver >= 12) { \
|
||||
assert((value & ~0x3ff) == 0); \
|
||||
elk_inst_set_bits(inst, g12_high, g12_low, value); \
|
||||
} else if (devinfo->ver >= 8) { \
|
||||
if (devinfo->ver >= 8) { \
|
||||
assert((value & ~0x3ff) == 0); \
|
||||
elk_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff); \
|
||||
elk_inst_set_bits(inst, g8_nine, g8_nine, value >> 9); \
|
||||
|
|
@ -833,13 +807,7 @@ static inline unsigned \
|
|||
elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \
|
||||
const elk_inst *inst) \
|
||||
{ \
|
||||
if (devinfo->ver >= 20) { \
|
||||
return elk_inst_bits(inst, g20_high, g20_low) << 1 | \
|
||||
(g20_zero == -1 ? 0 : \
|
||||
elk_inst_bits(inst, g20_zero, g20_zero)); \
|
||||
} else if (devinfo->ver >= 12) { \
|
||||
return elk_inst_bits(inst, g12_high, g12_low); \
|
||||
} else if (devinfo->ver >= 8) { \
|
||||
if (devinfo->ver >= 8) { \
|
||||
return elk_inst_bits(inst, g8_high, g8_low) | \
|
||||
(elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
|
||||
} else { \
|
||||
|
|
@ -847,11 +815,11 @@ elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \
|
|||
} \
|
||||
}
|
||||
|
||||
/* AddrImm for Align1 Indirect Addressing */
|
||||
/* -Gen 4- ----Gfx8---- -Gfx12- ---Gfx20--- */
|
||||
ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98, 107, 98, -1)
|
||||
ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66, 75, 66, 87)
|
||||
ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50, 59, 50, 33)
|
||||
/* AddrImm for Align1 Indirect Addressing */
|
||||
/* -Gen 4- ----Gfx8---- */
|
||||
ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96)
|
||||
ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64)
|
||||
ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48)
|
||||
|
||||
#define ELK_IA16_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \
|
||||
static inline void \
|
||||
|
|
@ -859,7 +827,6 @@ elk_inst_set_##reg##_ia16_addr_imm(const struct \
|
|||
intel_device_info *devinfo, \
|
||||
elk_inst *inst, unsigned value) \
|
||||
{ \
|
||||
assert(devinfo->ver < 12); \
|
||||
assert((value & ~0x3ff) == 0); \
|
||||
if (devinfo->ver >= 8) { \
|
||||
assert(GET_BITS(value, 3, 0) == 0); \
|
||||
|
|
@ -873,7 +840,6 @@ static inline unsigned \
|
|||
elk_inst_##reg##_ia16_addr_imm(const struct intel_device_info *devinfo, \
|
||||
const elk_inst *inst) \
|
||||
{ \
|
||||
assert(devinfo->ver < 12); \
|
||||
if (devinfo->ver >= 8) { \
|
||||
return (elk_inst_bits(inst, g8_high, g8_low) << 4) | \
|
||||
(elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
|
||||
|
|
@ -1049,12 +1015,8 @@ static inline unsigned
|
|||
elk_compact_inst_imm(const struct intel_device_info *devinfo,
|
||||
const elk_compact_inst *inst)
|
||||
{
|
||||
if (devinfo->ver >= 12) {
|
||||
return elk_compact_inst_bits(inst, 63, 52);
|
||||
} else {
|
||||
return (elk_compact_inst_bits(inst, 39, 35) << 8) |
|
||||
(elk_compact_inst_bits(inst, 63, 56));
|
||||
}
|
||||
return (elk_compact_inst_bits(inst, 39, 35) << 8) |
|
||||
(elk_compact_inst_bits(inst, 63, 56));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -528,7 +528,7 @@ namespace elk {
|
|||
/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
|
||||
* we need to reorder the operands.
|
||||
*/
|
||||
assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
|
||||
assert(shader->devinfo->ver >= 6);
|
||||
return emit(ELK_OPCODE_LRP, dst, a, y, x);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue