diff --git a/src/intel/compiler/elk/elk_compiler.c b/src/intel/compiler/elk/elk_compiler.c index dea7c1c7478..ac2fca18cbf 100644 --- a/src/intel/compiler/elk/elk_compiler.c +++ b/src/intel/compiler/elk/elk_compiler.c @@ -86,7 +86,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) * destination type can be Quadword and source type Doubleword for Gfx8 and * Gfx9. So, lower 64 bit multiply instruction on rest of the platforms. */ - if (devinfo->ver < 8 || devinfo->ver > 9) + if (devinfo->ver < 8) int64_options |= nir_lower_imul_2x32_64; /* We want the GLSL compiler to emit code that uses condition codes */ @@ -107,8 +107,7 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->lower_ffma16 = devinfo->ver < 6; nir_options->lower_ffma32 = devinfo->ver < 6; nir_options->lower_ffma64 = devinfo->ver < 6; - nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11; - nir_options->lower_fpow = devinfo->ver >= 12; + nir_options->lower_flrp32 = devinfo->ver < 6; nir_options->has_bfe = devinfo->ver >= 7; nir_options->has_bfm = devinfo->ver >= 7; @@ -127,9 +126,8 @@ elk_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) elk_nir_no_indirect_mask(compiler, i); nir_options->force_indirect_unrolling_sampler = devinfo->ver < 7; - if (devinfo->ver < 12) - nir_options->divergence_analysis_options |= - nir_divergence_single_prim_per_subgroup; + nir_options->divergence_analysis_options |= + nir_divergence_single_prim_per_subgroup; compiler->nir_options[i] = nir_options; } diff --git a/src/intel/compiler/elk/elk_compiler.h b/src/intel/compiler/elk/elk_compiler.h index c716e486008..158d6247887 100644 --- a/src/intel/compiler/elk/elk_compiler.h +++ b/src/intel/compiler/elk/elk_compiler.h @@ -1799,7 +1799,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo, * to do a full test run with elk_fs_test_dispatch_packing() hooked up to * the NIR front-end before changing this assertion. */ - assert(devinfo->ver <= 12); + assert(devinfo->ver <= 8); switch (stage) { case MESA_SHADER_FRAGMENT: { @@ -1813,8 +1813,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo, */ const struct elk_wm_prog_data *wm_prog_data = (const struct elk_wm_prog_data *)prog_data; - return devinfo->verx10 < 125 && - !wm_prog_data->persample_dispatch && + return !wm_prog_data->persample_dispatch && wm_prog_data->uses_vmask && max_polygons < 2; } diff --git a/src/intel/compiler/elk/elk_eu.h b/src/intel/compiler/elk/elk_eu.h index 91b63be082c..b56e34946a4 100644 --- a/src/intel/compiler/elk/elk_eu.h +++ b/src/intel/compiler/elk/elk_eu.h @@ -271,7 +271,7 @@ ALU2(SUBB) static inline unsigned reg_unit(const struct intel_device_info *devinfo) { - return devinfo->ver >= 20 ? 2 : 1; + return 1; } @@ -387,20 +387,6 @@ elk_sampler_desc(const struct intel_device_info *devinfo, const unsigned desc = (SET_BITS(binding_table_index, 7, 0) | SET_BITS(sampler, 11, 8)); - /* From GFX20 Bspec: Shared Functions - Message Descriptor - - * Sampling Engine: - * - * Message Type[5] 31 This bit represents the upper bit of message type - * 6-bit encoding (c.f. [16:12]). This bit is set - * for messages with programmable offsets. - */ - if (devinfo->ver >= 20) - return desc | SET_BITS(msg_type & 0x1F, 16, 12) | - SET_BITS(simd_mode & 0x3, 18, 17) | - SET_BITS(simd_mode >> 2, 29, 29) | - SET_BITS(return_format, 30, 30) | - SET_BITS(msg_type >> 5, 31, 31); - /* From the CHV Bspec: Shared Functions - Message Descriptor - * Sampling Engine: * @@ -443,9 +429,7 @@ elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo, static inline unsigned elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) { - if (devinfo->ver >= 20) - return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12); - else if (devinfo->ver >= 7) + if (devinfo->ver >= 7) return GET_BITS(desc, 16, 12); else if (devinfo->verx10 >= 45) return GET_BITS(desc, 15, 12); @@ -1066,7 +1050,7 @@ elk_fb_write_desc(const struct intel_device_info *devinfo, GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE : ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; - assert(devinfo->ver >= 10 || !coarse_write); + assert(!coarse_write); if (devinfo->ver >= 6) { return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) | @@ -1121,14 +1105,6 @@ elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo, return GET_BITS(desc, 15, 15); } -static inline bool -elk_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, - uint32_t desc) -{ - assert(devinfo->ver >= 10); - return GET_BITS(desc, 18, 18); -} - static inline bool elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode) { @@ -1570,18 +1546,6 @@ elk_mdc_sm2_exec_size(uint32_t sm2) return 8 << sm2; } -static inline uint32_t -elk_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo, - unsigned exec_size, unsigned msg_type) -{ - assert(devinfo->has_ray_tracing); - assert(devinfo->ver < 20 || exec_size == 16); - - return SET_BITS(0, 19, 19) | /* No header */ - SET_BITS(msg_type, 17, 14) | - SET_BITS(elk_mdc_sm2(exec_size), 8, 8); -} - static inline uint32_t elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo, uint32_t desc) @@ -1612,7 +1576,7 @@ elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, const bool simd_mode = exec_size == 16; const bool slot_group = group >= 16; - assert(devinfo->ver >= 10 || !coarse_pixel_rate); + assert(!coarse_pixel_rate); return (SET_BITS(slot_group, 11, 11) | SET_BITS(msg_type, 13, 12) | SET_BITS(!!noperspective, 14, 14) | diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 600e0360575..dd50f14ddc3 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -435,28 +435,7 @@ elk_fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const if (is_send_from_grf()) return false; - /* From Wa_1604601757: - * - * "When multiplying a DW and any lower precision integer, source modifier - * is not supported." - */ - if (devinfo->ver >= 12 && (opcode == ELK_OPCODE_MUL || - opcode == ELK_OPCODE_MAD)) { - const elk_reg_type exec_type = get_exec_type(this); - const unsigned min_type_sz = opcode == ELK_OPCODE_MAD ? - MIN2(type_sz(src[1].type), type_sz(src[2].type)) : - MIN2(type_sz(src[0].type), type_sz(src[1].type)); - - if (elk_reg_type_is_integer(exec_type) && - type_sz(exec_type) >= 4 && - type_sz(exec_type) != min_type_sz) - return false; - } - - if (!elk_backend_instruction::can_do_source_mods()) - return false; - - return true; + return elk_backend_instruction::can_do_source_mods(); } bool @@ -939,24 +918,20 @@ namespace { unsigned predicate_width(const intel_device_info *devinfo, elk_predicate predicate) { - if (devinfo->ver >= 20) { - return 1; - } else { - switch (predicate) { - case ELK_PREDICATE_NONE: return 1; - case ELK_PREDICATE_NORMAL: return 1; - case ELK_PREDICATE_ALIGN1_ANY2H: return 2; - case ELK_PREDICATE_ALIGN1_ALL2H: return 2; - case ELK_PREDICATE_ALIGN1_ANY4H: return 4; - case ELK_PREDICATE_ALIGN1_ALL4H: return 4; - case ELK_PREDICATE_ALIGN1_ANY8H: return 8; - case ELK_PREDICATE_ALIGN1_ALL8H: return 8; - case ELK_PREDICATE_ALIGN1_ANY16H: return 16; - case ELK_PREDICATE_ALIGN1_ALL16H: return 16; - case ELK_PREDICATE_ALIGN1_ANY32H: return 32; - case ELK_PREDICATE_ALIGN1_ALL32H: return 32; - default: unreachable("Unsupported predicate"); - } + switch (predicate) { + case ELK_PREDICATE_NONE: return 1; + case ELK_PREDICATE_NORMAL: return 1; + case ELK_PREDICATE_ALIGN1_ANY2H: return 2; + case ELK_PREDICATE_ALIGN1_ALL2H: return 2; + case ELK_PREDICATE_ALIGN1_ANY4H: return 4; + case ELK_PREDICATE_ALIGN1_ALL4H: return 4; + case ELK_PREDICATE_ALIGN1_ANY8H: return 8; + case ELK_PREDICATE_ALIGN1_ALL8H: return 8; + case ELK_PREDICATE_ALIGN1_ANY16H: return 16; + case ELK_PREDICATE_ALIGN1_ALL16H: return 16; + case ELK_PREDICATE_ALIGN1_ANY32H: return 32; + case ELK_PREDICATE_ALIGN1_ALL32H: return 32; + default: unreachable("Unsupported predicate"); } } @@ -996,8 +971,8 @@ namespace { unsigned elk_fs_inst::flags_read(const intel_device_info *devinfo) const { - if (devinfo->ver < 20 && (predicate == ELK_PREDICATE_ALIGN1_ANYV || - predicate == ELK_PREDICATE_ALIGN1_ALLV)) { + if (predicate == ELK_PREDICATE_ALIGN1_ANYV || + predicate == ELK_PREDICATE_ALIGN1_ALLV) { /* The vertical predication modes combine corresponding bits from * f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware. */ @@ -1275,17 +1250,6 @@ elk_fs_visitor::assign_curb_setup() prog_data->curb_read_length = uniform_push_length + ubo_push_length; uint64_t used = 0; - bool is_compute = gl_shader_stage_is_compute(stage); - - if (is_compute && elk_cs_prog_data(prog_data)->uses_inline_data) { - /* With COMPUTE_WALKER, we can push up to one register worth of data via - * the inline data parameter in the COMPUTE_WALKER command itself. - * - * TODO: Support inline data and push at the same time. - */ - assert(devinfo->verx10 >= 125); - assert(uniform_push_length <= reg_unit(devinfo)); - } /* Map the offsets in the UNIFORM file to fixed HW regs. */ foreach_block_and_inst(block, elk_fs_inst, inst, cfg) { @@ -1602,78 +1566,22 @@ elk_fs_visitor::assign_urb_setup() * representation described above into an offset and a * grf, which contains the plane parameters for the first * polygon processed by the thread. + * + * Earlier platforms and per-primitive block pack 2 logical + * input components per 32B register. */ - if (devinfo->ver >= 20 && !per_prim) { - /* Gfx20+ is able to pack 5 logical input components - * per 64B register for vertex setup data. - */ - const unsigned grf = base + idx / 5 * 2 * max_polygons; - assert(inst->src[i].offset / param_width < 12); - const unsigned delta = idx % 5 * 12 + - inst->src[i].offset / (param_width * chan_sz) * chan_sz + - inst->src[i].offset % chan_sz; - reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type), - delta); - } else { - /* Earlier platforms and per-primitive block pack 2 logical - * input components per 32B register. - */ - const unsigned grf = base + idx / 2 * max_polygons; - assert(inst->src[i].offset / param_width < REG_SIZE / 2); - const unsigned delta = (idx % 2) * (REG_SIZE / 2) + - inst->src[i].offset / (param_width * chan_sz) * chan_sz + - inst->src[i].offset % chan_sz; - reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type), - delta); - } + const unsigned grf = base + idx / 2 * max_polygons; + assert(inst->src[i].offset / param_width < REG_SIZE / 2); + const unsigned delta = (idx % 2) * (REG_SIZE / 2) + + inst->src[i].offset / (param_width * chan_sz) * chan_sz + + inst->src[i].offset % chan_sz; + reg = byte_offset(retype(elk_vec8_grf(grf, 0), inst->src[i].type), + delta); - if (max_polygons > 1) { - assert(devinfo->ver >= 12); - /* Misaligned channel strides that would lead to - * cross-channel access in the representation above are - * disallowed. - */ - assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz); - - /* Number of channels processing the same polygon. */ - const unsigned poly_width = dispatch_width / max_polygons; - assert(dispatch_width % max_polygons == 0); - - /* Accessing a subset of channels of a parameter vector - * starting from "chan" is necessary to handle - * SIMD-lowered instructions though. - */ - const unsigned chan = inst->src[i].offset % - (param_width * chan_sz) / chan_sz; - assert(chan < dispatch_width); - assert(chan % poly_width == 0); - const unsigned reg_size = reg_unit(devinfo) * REG_SIZE; - reg = byte_offset(reg, chan / poly_width * reg_size); - - if (inst->exec_size > poly_width) { - /* Accessing the parameters for multiple polygons. - * Corresponding parameters for different polygons - * are stored a GRF apart on the thread payload, so - * use that as vertical stride. - */ - const unsigned vstride = reg_size / type_sz(inst->src[i].type); - assert(vstride <= 32); - assert(chan % poly_width == 0); - reg = stride(reg, vstride, poly_width, 0); - } else { - /* Accessing one parameter for a single polygon -- - * Translate to a scalar region. - */ - assert(chan % poly_width + inst->exec_size <= poly_width); - reg = stride(reg, 0, 1, 0); - } - - } else { - const unsigned width = inst->src[i].stride == 0 ? - 1 : MIN2(inst->exec_size, 8); - reg = stride(reg, width * inst->src[i].stride, - width, inst->src[i].stride); - } + const unsigned width = inst->src[i].stride == 0 ? + 1 : MIN2(inst->exec_size, 8); + reg = stride(reg, width * inst->src[i].stride, + width, inst->src[i].stride); reg.abs = inst->src[i].abs; reg.negate = inst->src[i].negate; @@ -2078,9 +1986,6 @@ elk_get_subgroup_id_param_index(const intel_device_info *devinfo, if (prog_data->nr_params == 0) return -1; - if (devinfo->verx10 >= 125) - return -1; - /* The local thread id is always the last parameter in the list */ uint32_t last_param = prog_data->param[prog_data->nr_params - 1]; if (last_param == ELK_PARAM_BUILTIN_SUBGROUP_ID) @@ -3787,20 +3692,7 @@ elk_fs_visitor::lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block) bool do_addition = true; if (devinfo->ver >= 7) { - /* From Wa_1604601757: - * - * "When multiplying a DW and any lower precision integer, source modifier - * is not supported." - * - * An unsupported negate modifier on src[1] would ordinarily be - * lowered by the subsequent lower_regioning pass. In this case that - * pass would spawn another dword multiply. Instead, lower the - * modifier first. - */ - const bool source_mods_unsupported = (devinfo->ver >= 12); - - if (inst->src[1].abs || (inst->src[1].negate && - source_mods_unsupported)) + if (inst->src[1].abs) lower_src_modifiers(this, block, inst, 1); if (inst->src[1].file == IMM) { @@ -4027,8 +3919,7 @@ elk_fs_visitor::lower_integer_multiplication() } else if (!inst->dst.is_accumulator() && (inst->dst.type == ELK_REGISTER_TYPE_D || inst->dst.type == ELK_REGISTER_TYPE_UD) && - (!devinfo->has_integer_dword_mul || - devinfo->verx10 >= 125)) { + !devinfo->has_integer_dword_mul) { lower_mul_dword_inst(inst, block); inst->remove(block); progress = true; @@ -4192,7 +4083,6 @@ elk_sample_mask_reg(const fs_builder &bld) return elk_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16); } else { assert(s.devinfo->ver >= 6 && bld.dispatch_width() <= 16); - assert(s.devinfo->ver < 20); return retype(elk_vec1_grf((bld.group() >= 16 ? 2 : 1), 7), ELK_REGISTER_TYPE_UW); } @@ -4258,7 +4148,6 @@ elk_emit_predicate_on_sample_mask(const fs_builder &bld, elk_fs_inst *inst) assert(inst->predicate == ELK_PREDICATE_NORMAL); assert(!inst->predicate_inverse); assert(inst->flag_subreg == 0); - assert(s.devinfo->ver < 20); /* Combine the sample mask with the existing predicate by using a * vertical predication mode. */ @@ -4458,7 +4347,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader, * "Ternary instruction with condition modifiers must not use SIMD32." */ if (inst->conditional_mod && (devinfo->ver < 8 || - (inst->elk_is_3src(compiler) && devinfo->ver < 12))) + inst->elk_is_3src(compiler))) max_width = MIN2(max_width, 16); /* From the IVB PRMs (applies to other devices that don't have the @@ -4521,7 +4410,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader, * instructions do not support HF types and conversions from/to F are * required. */ - if (is_mixed_float_with_fp32_dst(inst) && devinfo->ver < 20) + if (is_mixed_float_with_fp32_dst(inst)) max_width = MIN2(max_width, 8); /* From the SKL PRM, Special Restrictions for Handling Mixed Mode @@ -4530,7 +4419,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader, * "No SIMD16 in mixed mode when destination is packed f16 for both * Align1 and Align16." */ - if (is_mixed_float_with_packed_fp16_dst(inst) && devinfo->ver < 20) + if (is_mixed_float_with_packed_fp16_dst(inst)) max_width = MIN2(max_width, 8); /* Only power-of-two execution sizes are representable in the instruction @@ -4566,7 +4455,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo, */ if (inst->opcode != ELK_SHADER_OPCODE_TEX && inst->components_read(TEX_LOGICAL_SRC_MIN_LOD)) - return devinfo->ver < 20 ? 8 : 16; + return 8; /* Calculate the number of coordinate components that have to be present * assuming that additional arguments follow the texel coordinates in the @@ -4581,14 +4470,6 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo, inst->opcode != ELK_SHADER_OPCODE_TXF_CMS_LOGICAL) ? 4 : 3; - /* On Gfx9+ the LOD argument is for free if we're able to use the LZ - * variant of the TXL or TXF message. - */ - const bool implicit_lod = devinfo->ver >= 9 && - (inst->opcode == ELK_SHADER_OPCODE_TXL || - inst->opcode == ELK_SHADER_OPCODE_TXF) && - inst->src[TEX_LOGICAL_SRC_LOD].is_zero(); - /* Calculate the total number of argument components that need to be passed * to the sampler unit. */ @@ -4596,7 +4477,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo, MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE), req_coord_components) + inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) + - (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) + + inst->components_read(TEX_LOGICAL_SRC_LOD) + inst->components_read(TEX_LOGICAL_SRC_LOD2) + inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + (inst->opcode == ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL ? @@ -4781,8 +4662,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst) /* MULH is lowered to the MUL/MACH sequence using the accumulator, which * is 8-wide on Gfx7+. */ - return (devinfo->ver >= 20 ? 16 : - devinfo->ver >= 7 ? 8 : + return (devinfo->ver >= 7 ? 8 : get_fpu_lowered_simd_width(shader, inst)); case ELK_FS_OPCODE_FB_WRITE_LOGICAL: @@ -4817,7 +4697,7 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst) /* TXD is unsupported in SIMD16 mode previous to Xe2. SIMD32 is still * unsuppported on Xe2. */ - return devinfo->ver < 20 ? 8 : 16; + return 8; case ELK_SHADER_OPCODE_TXL_LOGICAL: case ELK_FS_OPCODE_TXB_LOGICAL: @@ -4870,13 +4750,13 @@ get_lowered_simd_width(const elk_fs_visitor *shader, const elk_fs_inst *inst) case ELK_SHADER_OPCODE_URB_READ_LOGICAL: case ELK_SHADER_OPCODE_URB_WRITE_LOGICAL: - return MIN2(devinfo->ver < 20 ? 8 : 16, inst->exec_size); + return MIN2(8, inst->exec_size); case ELK_SHADER_OPCODE_QUAD_SWIZZLE: { const unsigned swiz = inst->src[1].ud; return (is_uniform(inst->src[0]) ? get_fpu_lowered_simd_width(shader, inst) : - devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 : + type_sz(inst->src[0].type) == 4 ? 8 : swiz == ELK_SWIZZLE_XYXY || swiz == ELK_SWIZZLE_ZWZW ? 4 : get_fpu_lowered_simd_width(shader, inst)); } @@ -5249,7 +5129,7 @@ bool elk_fs_visitor::lower_barycentrics() { const bool has_interleaved_layout = devinfo->has_pln || - (devinfo->ver >= 7 && devinfo->ver < 20); + devinfo->ver >= 7; bool progress = false; if (stage != MESA_SHADER_FRAGMENT || !has_interleaved_layout) @@ -6125,18 +6005,9 @@ elk_fs_visitor::set_tcs_invocation_id() struct elk_vue_prog_data *vue_prog_data = &tcs_prog_data->base; const fs_builder bld = fs_builder(this).at_end(); - const unsigned instance_id_mask = - (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) : - (devinfo->ver >= 11) ? INTEL_MASK(22, 16) : - INTEL_MASK(23, 17); - const unsigned instance_id_shift = - (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17; + const unsigned instance_id_mask = INTEL_MASK(23, 17); + const unsigned instance_id_shift = 17; - /* Get instance number from g0.2 bits: - * * 7:0 on DG2+ - * * 22:16 on gfx11+ - * * 23:17 otherwise - */ elk_fs_reg t = bld.vgrf(ELK_REGISTER_TYPE_UD); bld.AND(t, elk_fs_reg(retype(elk_vec1_grf(0, 2), ELK_REGISTER_TYPE_UD)), elk_imm_ud(instance_id_mask)); @@ -7341,8 +7212,6 @@ namespace elk { { if (!regs[0]) return elk_fs_reg(); - else if (bld.shader->devinfo->ver >= 20) - return fetch_payload_reg(bld, regs, ELK_REGISTER_TYPE_F, 2); const elk_fs_reg tmp = bld.vgrf(ELK_REGISTER_TYPE_F, 2); const elk::fs_builder hbld = bld.exec_all().group(8, 0); diff --git a/src/intel/compiler/elk/elk_fs_builder.h b/src/intel/compiler/elk/elk_fs_builder.h index 19c88a01fa1..c9ffcd4f3a1 100644 --- a/src/intel/compiler/elk/elk_fs_builder.h +++ b/src/intel/compiler/elk/elk_fs_builder.h @@ -781,7 +781,7 @@ namespace elk { LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) const { - if (shader->devinfo->ver >= 6 && shader->devinfo->ver <= 10) { + if (shader->devinfo->ver >= 6) { /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so * we need to reorder the operands. */ diff --git a/src/intel/compiler/elk/elk_fs_generator.cpp b/src/intel/compiler/elk/elk_fs_generator.cpp index 1b2e94d4900..49611794c6a 100644 --- a/src/intel/compiler/elk/elk_fs_generator.cpp +++ b/src/intel/compiler/elk/elk_fs_generator.cpp @@ -1760,8 +1760,7 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width, break; case ELK_OPCODE_CSEL: assert(devinfo->ver >= 8); - if (devinfo->ver < 10) - elk_set_default_access_mode(p, ELK_ALIGN_16); + elk_set_default_access_mode(p, ELK_ALIGN_16); elk_CSEL(p, dst, src[0], src[1], src[2]); break; case ELK_OPCODE_BFREV: diff --git a/src/intel/compiler/elk/elk_fs_visitor.cpp b/src/intel/compiler/elk/elk_fs_visitor.cpp index 5506caafb2b..1d0e73a9231 100644 --- a/src/intel/compiler/elk/elk_fs_visitor.cpp +++ b/src/intel/compiler/elk/elk_fs_visitor.cpp @@ -247,8 +247,7 @@ elk_fs_visitor::emit_interpolation_setup_gfx6() * on gfx20+. gi_reg is the 32B section of the GRF that * contains the subspan coordinates. */ - const struct elk_reg gi_reg = devinfo->ver >= 20 ? xe2_vec1_grf(i, 8) : - elk_vec1_grf(i + 1, 0); + const struct elk_reg gi_reg = elk_vec1_grf(i + 1, 0); const struct elk_reg gi_uw = retype(gi_reg, ELK_REGISTER_TYPE_UW); if (devinfo->ver >= 8 || dispatch_width == 8) { @@ -575,29 +574,6 @@ elk_fs_visitor::emit_fb_writes() this->outputs[0].file != BAD_FILE); assert(!prog_data->dual_src_blend || key->nr_color_regions == 1); - /* Following condition implements Wa_14017468336: - * - * "If dual source blend is enabled do not enable SIMD32 dispatch" and - * "For a thread dispatched as SIMD32, must not issue SIMD8 message with Last - * Render Target Select set." - */ - if (devinfo->ver >= 11 && devinfo->ver <= 12 && - prog_data->dual_src_blend) { - /* The dual-source RT write messages fail to release the thread - * dependency on ICL and TGL with SIMD32 dispatch, leading to hangs. - * - * XXX - Emit an extra single-source NULL RT-write marked LastRT in - * order to release the thread dependency without disabling - * SIMD32. - * - * The dual-source RT write messages may lead to hangs with SIMD16 - * dispatch on ICL due some unknown reasons, see - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/2183 - */ - limit_dispatch_width(8, "Dual source blending unsupported " - "in SIMD16 and SIMD32 modes.\n"); - } - do_emit_fb_writes(key->nr_color_regions, replicate_alpha); } @@ -801,11 +777,7 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count) elk_fs_inst *inst = abld.emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); - /* For ICL Wa_1805992985 one needs additional write in the end. */ - if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) - inst->eot = false; - else - inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY; + inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY; inst->offset = urb_offset; urb_offset = starting_urb_offset + slot + 1; @@ -851,57 +823,6 @@ elk_fs_visitor::emit_urb_writes(const elk_fs_reg &gs_vertex_count) inst->offset = 1; return; } - - /* ICL Wa_1805992985: - * - * ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The - * send cycle, which is a urb write with an eot must be 4 phases long and - * all 8 lanes must valid. - */ - if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) { - assert(dispatch_width == 8); - elk_fs_reg uniform_urb_handle = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD); - elk_fs_reg uniform_mask = elk_fs_reg(VGRF, alloc.allocate(1), ELK_REGISTER_TYPE_UD); - elk_fs_reg payload = elk_fs_reg(VGRF, alloc.allocate(4), ELK_REGISTER_TYPE_UD); - - /* Workaround requires all 8 channels (lanes) to be valid. This is - * understood to mean they all need to be alive. First trick is to find - * a live channel and copy its urb handle for all the other channels to - * make sure all handles are valid. - */ - bld.exec_all().MOV(uniform_urb_handle, bld.emit_uniformize(urb_handle)); - - /* Second trick is to use masked URB write where one can tell the HW to - * actually write data only for selected channels even though all are - * active. - * Third trick is to take advantage of the must-be-zero (MBZ) area in - * the very beginning of the URB. - * - * One masks data to be written only for the first channel and uses - * offset zero explicitly to land data to the MBZ area avoiding trashing - * any other part of the URB. - * - * Since the WA says that the write needs to be 4 phases long one uses - * 4 slots data. All are explicitly zeros in order to to keep the MBZ - * area written as zeros. - */ - bld.exec_all().MOV(uniform_mask, elk_imm_ud(0x10000u)); - bld.exec_all().MOV(offset(payload, bld, 0), elk_imm_ud(0u)); - bld.exec_all().MOV(offset(payload, bld, 1), elk_imm_ud(0u)); - bld.exec_all().MOV(offset(payload, bld, 2), elk_imm_ud(0u)); - bld.exec_all().MOV(offset(payload, bld, 3), elk_imm_ud(0u)); - - elk_fs_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask; - srcs[URB_LOGICAL_SRC_DATA] = payload; - srcs[URB_LOGICAL_SRC_COMPONENTS] = elk_imm_ud(4); - - elk_fs_inst *inst = bld.exec_all().emit(ELK_SHADER_OPCODE_URB_WRITE_LOGICAL, - reg_undef, srcs, ARRAY_SIZE(srcs)); - inst->eot = true; - inst->offset = 0; - } } void @@ -1002,7 +923,7 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler, live_analysis(this), regpressure_analysis(this), performance_analysis(this), needs_register_pressure(needs_register_pressure), - dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8), + dispatch_width(8), max_polygons(0), api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width)) { diff --git a/src/intel/compiler/elk/elk_inst.h b/src/intel/compiler/elk/elk_inst.h index 14cc71d008d..0ef9be348cd 100644 --- a/src/intel/compiler/elk/elk_inst.h +++ b/src/intel/compiler/elk/elk_inst.h @@ -639,8 +639,8 @@ F(rt_message_type, /* 4+ */ MD(10), MD( 8)) * Thread Spawn message function control bits: * @{ */ -FC(ts_resource_select, /* 4+ */ MD( 4), MD( 4), devinfo->ver < 11) -FC(ts_request_type, /* 4+ */ MD( 1), MD( 1), devinfo->ver < 11) +F(ts_resource_select, /* 4+ */ MD( 4), MD( 4)) +F(ts_request_type, /* 4+ */ MD( 1), MD( 1)) F(ts_opcode, /* 4+ */ MD( 0), MD( 0)) /** @} */ @@ -677,13 +677,8 @@ static inline uint64_t elk_inst_imm_uq(const struct intel_device_info *devinfo, const elk_inst *insn) { - if (devinfo->ver >= 12) { - return elk_inst_bits(insn, 95, 64) << 32 | - elk_inst_bits(insn, 127, 96); - } else { - assert(devinfo->ver >= 8); - return elk_inst_bits(insn, 127, 64); - } + assert(devinfo->ver >= 8); + return elk_inst_bits(insn, 127, 64); } static inline float @@ -749,12 +744,7 @@ elk_inst_set_imm_df(const struct intel_device_info *devinfo, (void) devinfo; dt.d = value; - if (devinfo->ver >= 12) { - elk_inst_set_bits(insn, 95, 64, dt.u >> 32); - elk_inst_set_bits(insn, 127, 96, dt.u & 0xFFFFFFFF); - } else { - elk_inst_set_bits(insn, 127, 64, dt.u); - } + elk_inst_set_bits(insn, 127, 64, dt.u); } static inline void @@ -762,12 +752,7 @@ elk_inst_set_imm_uq(const struct intel_device_info *devinfo, elk_inst *insn, uint64_t value) { (void) devinfo; - if (devinfo->ver >= 12) { - elk_inst_set_bits(insn, 95, 64, value >> 32); - elk_inst_set_bits(insn, 127, 96, value & 0xFFFFFFFF); - } else { - elk_inst_set_bits(insn, 127, 64, value); - } + elk_inst_set_bits(insn, 127, 64, value); } /** @} */ @@ -802,25 +787,14 @@ REG_TYPE(src1) /* The AddrImm fields are split into two discontiguous sections on Gfx8+ */ -#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \ - g12_high, g12_low, g20_high, g20_low, g20_zero) \ +#define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \ static inline void \ elk_inst_set_##reg##_ia1_addr_imm(const struct \ intel_device_info *devinfo, \ elk_inst *inst, \ unsigned value) \ { \ - if (devinfo->ver >= 20) { \ - assert((value & ~0x7ff) == 0); \ - elk_inst_set_bits(inst, g20_high, g20_low, value >> 1); \ - if (g20_zero == -1) \ - assert((value & 1) == 0); \ - else \ - elk_inst_set_bits(inst, g20_zero, g20_zero, value & 1); \ - } else if (devinfo->ver >= 12) { \ - assert((value & ~0x3ff) == 0); \ - elk_inst_set_bits(inst, g12_high, g12_low, value); \ - } else if (devinfo->ver >= 8) { \ + if (devinfo->ver >= 8) { \ assert((value & ~0x3ff) == 0); \ elk_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff); \ elk_inst_set_bits(inst, g8_nine, g8_nine, value >> 9); \ @@ -833,13 +807,7 @@ static inline unsigned \ elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \ const elk_inst *inst) \ { \ - if (devinfo->ver >= 20) { \ - return elk_inst_bits(inst, g20_high, g20_low) << 1 | \ - (g20_zero == -1 ? 0 : \ - elk_inst_bits(inst, g20_zero, g20_zero)); \ - } else if (devinfo->ver >= 12) { \ - return elk_inst_bits(inst, g12_high, g12_low); \ - } else if (devinfo->ver >= 8) { \ + if (devinfo->ver >= 8) { \ return elk_inst_bits(inst, g8_high, g8_low) | \ (elk_inst_bits(inst, g8_nine, g8_nine) << 9); \ } else { \ @@ -847,11 +815,11 @@ elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \ } \ } -/* AddrImm for Align1 Indirect Addressing */ -/* -Gen 4- ----Gfx8---- -Gfx12- ---Gfx20--- */ -ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98, 107, 98, -1) -ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66, 75, 66, 87) -ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50, 59, 50, 33) +/* AddrImm for Align1 Indirect Addressing */ +/* -Gen 4- ----Gfx8---- */ +ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96) +ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64) +ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48) #define ELK_IA16_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \ static inline void \ @@ -859,7 +827,6 @@ elk_inst_set_##reg##_ia16_addr_imm(const struct \ intel_device_info *devinfo, \ elk_inst *inst, unsigned value) \ { \ - assert(devinfo->ver < 12); \ assert((value & ~0x3ff) == 0); \ if (devinfo->ver >= 8) { \ assert(GET_BITS(value, 3, 0) == 0); \ @@ -873,7 +840,6 @@ static inline unsigned \ elk_inst_##reg##_ia16_addr_imm(const struct intel_device_info *devinfo, \ const elk_inst *inst) \ { \ - assert(devinfo->ver < 12); \ if (devinfo->ver >= 8) { \ return (elk_inst_bits(inst, g8_high, g8_low) << 4) | \ (elk_inst_bits(inst, g8_nine, g8_nine) << 9); \ @@ -1049,12 +1015,8 @@ static inline unsigned elk_compact_inst_imm(const struct intel_device_info *devinfo, const elk_compact_inst *inst) { - if (devinfo->ver >= 12) { - return elk_compact_inst_bits(inst, 63, 52); - } else { - return (elk_compact_inst_bits(inst, 39, 35) << 8) | - (elk_compact_inst_bits(inst, 63, 56)); - } + return (elk_compact_inst_bits(inst, 39, 35) << 8) | + (elk_compact_inst_bits(inst, 63, 56)); } /** diff --git a/src/intel/compiler/elk/elk_vec4_builder.h b/src/intel/compiler/elk/elk_vec4_builder.h index 0d6111f0222..9591f0a2731 100644 --- a/src/intel/compiler/elk/elk_vec4_builder.h +++ b/src/intel/compiler/elk/elk_vec4_builder.h @@ -528,7 +528,7 @@ namespace elk { /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so * we need to reorder the operands. */ - assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9); + assert(shader->devinfo->ver >= 6); return emit(ELK_OPCODE_LRP, dst, a, y, x); }