diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 1af3d472647..6cab08cfc97 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -838,7 +838,7 @@ dest(FILE *file, const struct brw_isa_info *isa, const brw_inst *inst) { const struct intel_device_info *devinfo = isa->devinfo; enum brw_reg_type type = brw_inst_dst_type(devinfo, inst); - unsigned elem_size = brw_reg_type_to_size(type); + unsigned elem_size = brw_type_size_bytes(type); int err = 0; if (is_split_send(devinfo, brw_inst_opcode(isa, inst))) { @@ -945,7 +945,7 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo, type = brw_inst_3src_a16_dst_type(devinfo, inst); subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4; } - subreg_nr /= brw_reg_type_to_size(type); + subreg_nr /= brw_type_size_bytes(type); if (subreg_nr) format(file, ".%u", subreg_nr); @@ -1019,7 +1019,7 @@ src_da1(FILE *file, if (err == -1) return 0; if (sub_reg_num) { - unsigned elem_size = brw_reg_type_to_size(type); + unsigned elem_size = brw_type_size_bytes(type); format(file, ".%d", sub_reg_num / elem_size); /* use formal style like spec */ } src_align1_region(file, _vert_stride, _width, _horiz_stride); @@ -1106,7 +1106,7 @@ src_da16(FILE *file, if (err == -1) return 0; if (_subreg_nr) { - unsigned elem_size = brw_reg_type_to_size(type); + unsigned elem_size = brw_type_size_bytes(type); /* bit4 for subreg number byte addressing. Make this same meaning as in da1 case, so output looks consistent. */ @@ -1272,7 +1272,7 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo, _width == BRW_WIDTH_1 && _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - subreg_nr /= brw_reg_type_to_size(type); + subreg_nr /= brw_type_size_bytes(type); err |= control(file, "negate", m_negate, brw_inst_3src_src0_negate(devinfo, inst), NULL); @@ -1346,7 +1346,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo, _width == BRW_WIDTH_1 && _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - subreg_nr /= brw_reg_type_to_size(type); + subreg_nr /= brw_type_size_bytes(type); err |= control(file, "negate", m_negate, brw_inst_3src_src1_negate(devinfo, inst), NULL); @@ -1434,7 +1434,7 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo, _width == BRW_WIDTH_1 && _horiz_stride == BRW_HORIZONTAL_STRIDE_0; - subreg_nr /= brw_reg_type_to_size(type); + subreg_nr /= brw_type_size_bytes(type); err |= control(file, "negate", m_negate, brw_inst_3src_src2_negate(devinfo, inst), NULL); @@ -2009,7 +2009,7 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, } else if (!is_send(opcode) && (devinfo->ver < 12 || brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE || - type_sz(brw_inst_src0_type(devinfo, inst)) < 8)) { + brw_type_size_bytes(brw_inst_src0_type(devinfo, inst)) < 8)) { err |= control(file, "conditional modifier", conditional_modifier, brw_inst_cond_modifier(devinfo, inst), NULL); diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index e33eb088239..0d9c7678888 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -50,7 +50,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) */ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && dest.nr == BRW_ARF_NULL && - type_sz(dest.type) == 1 && + brw_type_size_bytes(dest.type) == 1 && dest.hstride == BRW_HORIZONTAL_STRIDE_1) { dest.hstride = BRW_HORIZONTAL_STRIDE_2; } @@ -187,7 +187,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) else brw_inst_set_imm_ud(devinfo, inst, reg.ud); - if (devinfo->ver < 12 && type_sz(reg.type) < 8) { + if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) { brw_inst_set_src1_reg_file(devinfo, inst, BRW_ARCHITECTURE_REGISTER_FILE); brw_inst_set_src1_reg_hw_type(devinfo, inst, @@ -288,7 +288,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (reg.file == BRW_IMMEDIATE_VALUE) { /* two-argument instructions can only use 32-bit immediates */ - assert(type_sz(reg.type) < 8); + assert(brw_type_size_bytes(reg.type) < 8); brw_inst_set_imm_ud(devinfo, inst, reg.ud); } else { /* This is a hardware restriction, which may or may not be lifted @@ -486,8 +486,10 @@ brw_alu2(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) { /* 64-bit immediates are only supported on 1-src instructions */ - assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4); - assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4); + assert(src0.file != BRW_IMMEDIATE_VALUE || + brw_type_size_bytes(src0.type) <= 4); + assert(src1.file != BRW_IMMEDIATE_VALUE || + brw_type_size_bytes(src1.type) <= 4); brw_inst *insn = next_insn(p, opcode); brw_set_dest(p, insn, dest); @@ -1918,7 +1920,7 @@ brw_broadcast(struct brw_codegen *p, const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0; src = stride(suboffset(src, i), 0, 1, 0); - if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) { + if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) { brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), subscript(src, BRW_TYPE_D, 0)); brw_set_default_swsb(p, tgl_swsb_null()); @@ -1956,7 +1958,7 @@ brw_broadcast(struct brw_codegen *p, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); brw_SHL(p, addr, vec1(idx), - brw_imm_ud(util_logbase2(type_sz(src.type)) + + brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) + src.hstride - 1)); /* We can only address up to limit bytes using the indirect @@ -1974,7 +1976,7 @@ brw_broadcast(struct brw_codegen *p, brw_set_default_swsb(p, tgl_swsb_regdist(1)); /* Use indirect addressing to fetch the specified component. */ - if (type_sz(src.type) > 4 && + if (brw_type_size_bytes(src.type) > 4 && (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) { /* From the Cherryview PRM Vol 7. "Register Region Restrictions": * @@ -2117,8 +2119,8 @@ brw_MOV_reloc_imm(struct brw_codegen *p, enum brw_reg_type src_type, uint32_t id) { - assert(type_sz(src_type) == 4); - assert(type_sz(dst.type) == 4); + assert(brw_type_size_bytes(src_type) == 4); + assert(brw_type_size_bytes(dst.type) == 4); brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM, p->next_insn_offset, 0); diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index acc8979b45f..012dff1fb42 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -629,12 +629,14 @@ is_byte_conversion(const struct brw_isa_info *isa, enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); if (dst_type != src0_type && - (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { + (brw_type_size_bytes(dst_type) == 1 || + brw_type_size_bytes(src0_type) == 1)) { return true; } else if (num_sources > 1) { enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); return dst_type != src1_type && - (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); + (brw_type_size_bytes(dst_type) == 1 || + brw_type_size_bytes(src1_type) == 1); } return false; @@ -664,13 +666,13 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, * a D or UD, so it is allowed. */ if (num_sources == 3 && brw_inst_opcode(isa, inst) != BRW_OPCODE_DPAS) { - ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || - brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, + ERROR_IF(brw_type_size_bytes(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || + brw_type_size_bytes(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, "Byte data type is not supported for src1/2 register regioning. This includes " "byte broadcast as well."); } if (num_sources == 2) { - ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, + ERROR_IF(brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 1, "Byte data type is not supported for src1 register regioning. This includes " "byte broadcast as well."); } @@ -726,7 +728,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, !devinfo->has_64bit_int, "64-bit int source, but platform does not support it"); if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && - num_sources == 3 && type_sz(src_type) > 4) { + num_sources == 3 && brw_type_size_bytes(src_type) > 4) { /* From the Broadwell PRM, Volume 7 "3D Media GPGPU", page 944: * * "This is applicable to 32b datatypes and 16b datatype. 64b @@ -810,8 +812,8 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, } unsigned exec_type = execution_type(isa, inst); - unsigned exec_type_size = brw_reg_type_to_size(exec_type); - unsigned dst_type_size = brw_reg_type_to_size(dst_type); + unsigned exec_type_size = brw_type_size_bytes(exec_type); + unsigned dst_type_size = brw_type_size_bytes(dst_type); if (is_byte_conversion(isa, inst)) { /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: @@ -827,14 +829,14 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, enum brw_reg_type src1_type = num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : 0; - ERROR_IF(type_sz(dst_type) == 1 && - (type_sz(src0_type) == 8 || - (num_sources > 1 && type_sz(src1_type) == 8)), + ERROR_IF(brw_type_size_bytes(dst_type) == 1 && + (brw_type_size_bytes(src0_type) == 8 || + (num_sources > 1 && brw_type_size_bytes(src1_type) == 8)), "There are no direct conversions between 64-bit types and B/UB"); - ERROR_IF(type_sz(dst_type) == 8 && - (type_sz(src0_type) == 1 || - (num_sources > 1 && type_sz(src1_type) == 1)), + ERROR_IF(brw_type_size_bytes(dst_type) == 8 && + (brw_type_size_bytes(src0_type) == 1 || + (num_sources > 1 && brw_type_size_bytes(src1_type) == 1)), "There are no direct conversions between 64-bit types and B/UB"); } @@ -855,11 +857,11 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, enum brw_reg_type src1_type = num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : 0; ERROR_IF(dst_type == BRW_TYPE_HF && - (type_sz(src0_type) == 8 || - (num_sources > 1 && type_sz(src1_type) == 8)), + (brw_type_size_bytes(src0_type) == 8 || + (num_sources > 1 && brw_type_size_bytes(src1_type) == 8)), "There are no direct conversions between 64-bit types and HF"); - ERROR_IF(type_sz(dst_type) == 8 && + ERROR_IF(brw_type_size_bytes(dst_type) == 8 && (src0_type == BRW_TYPE_HF || (num_sources > 1 && src1_type == BRW_TYPE_HF)), "There are no direct conversions between 64-bit types and HF"); @@ -1023,7 +1025,7 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa, width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ type = brw_inst_src ## n ## _type(devinfo, inst); \ - element_size = brw_reg_type_to_size(type); \ + element_size = brw_type_size_bytes(type); \ subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) if (i == 0) { @@ -1436,7 +1438,7 @@ region_alignment_rules(const struct brw_isa_info *isa, width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ type = brw_inst_src ## n ## _type(devinfo, inst); \ - element_size = brw_reg_type_to_size(type); \ + element_size = brw_type_size_bytes(type); \ subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ align1_access_mask(src ## n ## _access_mask, \ exec_size, element_size, subreg, \ @@ -1464,7 +1466,7 @@ region_alignment_rules(const struct brw_isa_info *isa, unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); enum brw_reg_type dst_type = inst_dst_type(isa, inst); - unsigned element_size = brw_reg_type_to_size(dst_type); + unsigned element_size = brw_type_size_bytes(dst_type); unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; ERROR_IF(offset >= 64 * reg_unit(devinfo), @@ -1530,7 +1532,7 @@ vector_immediate_restrictions(const struct brw_isa_info *isa, return (struct string){}; enum brw_reg_type dst_type = inst_dst_type(isa, inst); - unsigned dst_type_size = brw_reg_type_to_size(dst_type); + unsigned dst_type_size = brw_type_size_bytes(dst_type); unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); @@ -1592,11 +1594,11 @@ special_requirements_for_handling_double_precision_data_types( return (struct string){}; enum brw_reg_type exec_type = execution_type(isa, inst); - unsigned exec_type_size = brw_reg_type_to_size(exec_type); + unsigned exec_type_size = brw_type_size_bytes(exec_type); enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); enum brw_reg_type dst_type = inst_dst_type(isa, inst); - unsigned dst_type_size = brw_reg_type_to_size(dst_type); + unsigned dst_type_size = brw_type_size_bytes(dst_type); unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); @@ -1629,7 +1631,7 @@ special_requirements_for_handling_double_precision_data_types( hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ type = brw_inst_src ## n ## _type(devinfo, inst); \ - type_size = brw_reg_type_to_size(type); \ + type_size = brw_type_size_bytes(type); \ reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) @@ -1758,7 +1760,7 @@ special_requirements_for_handling_double_precision_data_types( * Quad-Word data must not be used." */ if (devinfo->verx10 >= 125 && - (brw_type_is_float(type) || type_sz(type) == 8)) { + (brw_type_is_float(type) || brw_type_size_bytes(type) == 8)) { ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, "Vx1 and VxH indirect addressing for Float, Half-Float, " @@ -1777,8 +1779,8 @@ special_requirements_for_handling_double_precision_data_types( enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); enum brw_reg_type src1_type = num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; - unsigned src0_type_size = brw_reg_type_to_size(src0_type); - unsigned src1_type_size = brw_reg_type_to_size(src1_type); + unsigned src0_type_size = brw_type_size_bytes(src0_type); + unsigned src1_type_size = brw_type_size_bytes(src1_type); ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && dst_type_size == 8 && @@ -1820,17 +1822,20 @@ instruction_restrictions(const struct brw_isa_info *isa, if (devinfo->ver >= 12 && brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) { enum brw_reg_type exec_type = execution_type(isa, inst); - const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 || + const bool src0_valid = + brw_type_size_bytes(brw_inst_src0_type(devinfo, inst)) == 4 || brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || !(brw_inst_src0_negate(devinfo, inst) || brw_inst_src0_abs(devinfo, inst)); - const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 || + const bool src1_valid = + brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 4 || brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || !(brw_inst_src1_negate(devinfo, inst) || brw_inst_src1_abs(devinfo, inst)); ERROR_IF(!brw_type_is_float(exec_type) && - type_sz(exec_type) == 4 && !(src0_valid && src1_valid), + brw_type_size_bytes(exec_type) == 4 && + !(src0_valid && src1_valid), "When multiplying a DW and any lower precision integer, source " "modifier is not supported."); } @@ -1861,7 +1866,8 @@ instruction_restrictions(const struct brw_isa_info *isa, * text. */ ERROR_IF(brw_type_is_int(src1_type) && - type_sz(src0_type) < 4 && type_sz(src1_type) == 4, + brw_type_size_bytes(src0_type) < 4 && + brw_type_size_bytes(src1_type) == 4, "When multiplying a DW and any lower precision integer, the " "DW operand must be src0."); @@ -2163,11 +2169,11 @@ instruction_restrictions(const struct brw_isa_info *isa, } const unsigned src1_bits_per_element = - (8 * brw_reg_type_to_size(src1_type)) >> + brw_type_size_bits(src1_type) >> brw_inst_dpas_3src_src1_subbyte(devinfo, inst); const unsigned src2_bits_per_element = - (8 * brw_reg_type_to_size(src2_type)) >> + brw_type_size_bits(src2_type) >> brw_inst_dpas_3src_src2_subbyte(devinfo, inst); /* The MAX2(1, ...) is just to prevent possible division by 0 later. */ @@ -2208,16 +2214,16 @@ instruction_restrictions(const struct brw_isa_info *isa, "Src2 subregister offset must be a multiple of SystolicDepth " "times OPS_PER_CHAN."); - ERROR_IF(dst_subnr * type_sz(dst_type) >= REG_SIZE, + ERROR_IF(dst_subnr * brw_type_size_bytes(dst_type) >= REG_SIZE, "Destination subregister specifies next register."); - ERROR_IF(src0_subnr * type_sz(src0_type) >= REG_SIZE, + ERROR_IF(src0_subnr * brw_type_size_bytes(src0_type) >= REG_SIZE, "Src0 subregister specifies next register."); - ERROR_IF((src1_subnr * type_sz(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE, + ERROR_IF((src1_subnr * brw_type_size_bytes(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE, "Src1 subregister specifies next register."); - ERROR_IF((src2_subnr * type_sz(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE, + ERROR_IF((src2_subnr * brw_type_size_bytes(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE, "Src2 subregister specifies next register."); if (brw_inst_3src_atomic_control(devinfo, inst)) { diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index e9561d34f05..4f12a1c09f6 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -436,13 +436,13 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const if (devinfo->ver >= 12 && (opcode == BRW_OPCODE_MUL || opcode == BRW_OPCODE_MAD)) { const brw_reg_type exec_type = get_exec_type(this); - const unsigned min_type_sz = opcode == BRW_OPCODE_MAD ? - MIN2(type_sz(src[1].type), type_sz(src[2].type)) : - MIN2(type_sz(src[0].type), type_sz(src[1].type)); + const unsigned min_brw_type_size_bytes = opcode == BRW_OPCODE_MAD ? + MIN2(brw_type_size_bytes(src[1].type), brw_type_size_bytes(src[2].type)) : + MIN2(brw_type_size_bytes(src[0].type), brw_type_size_bytes(src[1].type)); if (brw_type_is_int(exec_type) && - type_sz(exec_type) >= 4 && - type_sz(exec_type) != min_type_sz) + brw_type_size_bytes(exec_type) >= 4 && + brw_type_size_bytes(exec_type) != min_brw_type_size_bytes) return false; } @@ -614,9 +614,9 @@ fs_reg::component_size(unsigned width) const const unsigned vs = vstride ? 1 << (vstride - 1) : 0; const unsigned hs = hstride ? 1 << (hstride - 1) : 0; assert(w > 0); - return ((MAX2(1, h) - 1) * vs + (w - 1) * hs + 1) * type_sz(type); + return ((MAX2(1, h) - 1) * vs + (w - 1) * hs + 1) * brw_type_size_bytes(type); } else { - return MAX2(width * stride, 1) * type_sz(type); + return MAX2(width * stride, 1) * brw_type_size_bytes(type); } } @@ -708,7 +708,7 @@ fs_inst::is_partial_write() const return this->size_written < 32; } - return this->exec_size * type_sz(this->dst.type) < 32 || + return this->exec_size * brw_type_size_bytes(this->dst.type) < 32 || !this->dst.is_contiguous(); } @@ -963,7 +963,7 @@ fs_inst::size_read(int arg) const switch (src[arg].file) { case UNIFORM: case IMM: - return components_read(arg) * type_sz(src[arg].type); + return components_read(arg) * brw_type_size_bytes(src[arg].type); case BAD_FILE: case ARF: case FIXED_GRF: @@ -1768,7 +1768,7 @@ fs_visitor::assign_urb_setup() * cross-channel access in the representation above are * disallowed. */ - assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz); + assert(inst->src[i].stride * brw_type_size_bytes(inst->src[i].type) == chan_sz); /* Number of channels processing the same polygon. */ const unsigned poly_width = dispatch_width / max_polygons; @@ -1791,7 +1791,7 @@ fs_visitor::assign_urb_setup() * are stored a GRF apart on the thread payload, so * use that as vertical stride. */ - const unsigned vstride = reg_size / type_sz(inst->src[i].type); + const unsigned vstride = reg_size / brw_type_size_bytes(inst->src[i].type); assert(vstride <= 32); assert(chan % poly_width == 0); reg = stride(reg, vstride, poly_width, 0); @@ -1851,7 +1851,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst) */ unsigned total_size = inst->exec_size * inst->src[i].stride * - type_sz(inst->src[i].type); + brw_type_size_bytes(inst->src[i].type); assert(total_size <= 2 * REG_SIZE); const unsigned exec_size = @@ -2517,7 +2517,7 @@ fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file) const case FIXED_GRF: fprintf(file, "g%d", inst->dst.nr); if (inst->dst.subnr != 0) - fprintf(file, ".%d", inst->dst.subnr / type_sz(inst->dst.type)); + fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type)); break; case BAD_FILE: fprintf(file, "(null)"); @@ -2658,7 +2658,7 @@ fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file) const if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) { assert(inst->src[i].offset == 0); - fprintf(file, ".%d", inst->src[i].subnr / type_sz(inst->src[i].type)); + fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); } else if (inst->src[i].offset || (inst->src[i].file == VGRF && alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index c0caafb9474..f5cbdb99f7d 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -192,7 +192,7 @@ namespace brw { if (n > 0) return fs_reg(VGRF, shader->alloc.allocate( - DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), + DIV_ROUND_UP(n * brw_type_size_bytes(type) * dispatch_width(), unit * REG_SIZE) * unit), type); else @@ -476,7 +476,7 @@ namespace brw { /* The instruction splitting code isn't advanced enough to split * these so we need to handle that ourselves. */ - if (dispatch_width() * type_sz(tmp.type) > 2 * REG_SIZE) { + if (dispatch_width() * brw_type_size_bytes(tmp.type) > 2 * REG_SIZE) { const unsigned half_width = dispatch_width() / 2; const fs_builder ubld = exec_all().group(half_width, 0); fs_reg left = tmp; @@ -496,7 +496,7 @@ namespace brw { } if (cluster_size > 2) { - if (type_sz(tmp.type) <= 4) { + if (brw_type_size_bytes(tmp.type) <= 4) { const fs_builder ubld = exec_all().group(dispatch_width() / 4, 0); ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 2, 4); @@ -747,7 +747,7 @@ namespace brw { inst->header_size = header_size; inst->size_written = header_size * REG_SIZE; for (unsigned i = header_size; i < sources; i++) { - inst->size_written += dispatch_width() * type_sz(src[i].type) * + inst->size_written += dispatch_width() * brw_type_size_bytes(src[i].type) * dst.stride; } diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 2888bc1dd3b..c310143ee5b 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -341,7 +341,8 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block) if (!brw_type_is_float(inst->dst.type)) break; - if (type_sz(scan_inst->dst.type) > type_sz(inst->dst.type)) + if (brw_type_size_bits(scan_inst->dst.type) > + brw_type_size_bits(inst->dst.type)) break; } else { /* If the destination type of scan_inst is integer, then: @@ -360,11 +361,12 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block) * as the destination of inst and the same signedness. */ if (!brw_type_is_int(inst->src[0].type) || - type_sz(scan_inst->dst.type) != type_sz(inst->src[0].type)) + brw_type_size_bits(scan_inst->dst.type) != brw_type_size_bits(inst->src[0].type)) break; if (brw_type_is_int(inst->dst.type)) { - if (type_sz(inst->dst.type) < type_sz(scan_inst->dst.type)) + if (brw_type_size_bits(inst->dst.type) < + brw_type_size_bits(scan_inst->dst.type)) break; if (inst->conditional_mod != BRW_CONDITIONAL_Z && @@ -388,7 +390,8 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block) /* Comparison result may be altered if the bit-size changes * since that affects range, denorms, etc */ - if (type_sz(scan_inst->dst.type) != type_sz(inst->dst.type)) + if (brw_type_size_bits(scan_inst->dst.type) != + brw_type_size_bits(inst->dst.type)) break; if (brw_type_is_float(scan_inst->dst.type) != diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 42b811f8626..ab57190d915 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -1081,7 +1081,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip, unsigned box_idx = box_instruction(table, const_ctx, inst, ip, block); v->value.u64 = inst->src[i].d64; - v->bit_size = 8 * type_sz(inst->src[i].type); + v->bit_size = brw_type_size_bits(inst->src[i].type); v->instr_index = box_idx; v->src = i; v->allow_one_constant = allow_one_constant; @@ -1570,7 +1570,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) struct brw_reg imm_reg = build_imm_reg_for_copy(imm); /* Ensure we have enough space in the register to copy the immediate */ - assert(reg.offset + type_sz(imm_reg.type) * width <= REG_SIZE); + assert(reg.offset + brw_type_size_bytes(imm_reg.type) * width <= REG_SIZE); ibld.MOV(retype(reg, imm_reg.type), imm_reg); } @@ -1585,11 +1585,11 @@ brw_fs_opt_combine_constants(fs_visitor &s) if (link->type == either_type) { /* Do not change the register type. */ } else if (link->type == integer_only) { - reg->type = brw_int_type(type_sz(reg->type), true); + reg->type = brw_int_type(brw_type_size_bytes(reg->type), true); } else { assert(link->type == float_only); - switch (type_sz(reg->type)) { + switch (brw_type_size_bytes(reg->type)) { case 2: reg->type = BRW_TYPE_HF; break; @@ -1606,7 +1606,7 @@ brw_fs_opt_combine_constants(fs_visitor &s) } else if ((link->inst->opcode == BRW_OPCODE_SHL || link->inst->opcode == BRW_OPCODE_ASR) && link->negate) { - reg->type = brw_int_type(type_sz(reg->type), true); + reg->type = brw_int_type(brw_type_size_bytes(reg->type), true); } #if MESA_DEBUG diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index fbd55ff5985..a3a534a4e08 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -591,8 +591,8 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type, * would break this restriction. */ if (has_dst_aligned_region_restriction(devinfo, inst, dst_type) && - !(type_sz(inst->src[arg].type) * stride == - type_sz(dst_type) * inst->dst.stride || + !(brw_type_size_bytes(inst->src[arg].type) * stride == + brw_type_size_bytes(dst_type) * inst->dst.stride || stride == 0)) return false; @@ -607,7 +607,7 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type, * cannot use the replicate control. */ if (inst->is_3src(compiler)) { - if (type_sz(inst->src[arg].type) > 4) + if (brw_type_size_bytes(inst->src[arg].type) > 4) return stride == 1; else return stride == 1 || stride == 0; @@ -825,7 +825,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, * destination of the copy, and simply replacing the sources would give a * program with different semantics. */ - if ((type_sz(entry->dst.type) < type_sz(inst->src[arg].type) || + if ((brw_type_size_bits(entry->dst.type) < brw_type_size_bits(inst->src[arg].type) || entry->is_partial_write) && inst->opcode != BRW_OPCODE_MOV) { return false; @@ -846,7 +846,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, */ if (entry_stride != 1 && (inst->src[arg].stride * - type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0) + brw_type_size_bytes(inst->src[arg].type)) % brw_type_size_bytes(entry->src.type) != 0) return false; /* Since semantics of source modifiers are type-dependent we need to @@ -858,7 +858,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, if (has_source_modifiers && entry->dst.type != inst->src[arg].type && (!inst->can_change_types() || - type_sz(entry->dst.type) != type_sz(inst->src[arg].type))) + brw_type_size_bits(entry->dst.type) != brw_type_size_bits(inst->src[arg].type))) return false; if ((entry->src.negate || entry->src.abs) && @@ -881,8 +881,9 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, if (entry->src.file == FIXED_GRF) { if (inst->src[arg].stride) { const unsigned orig_width = 1 << entry->src.width; - const unsigned reg_width = REG_SIZE / (type_sz(inst->src[arg].type) * - inst->src[arg].stride); + const unsigned reg_width = + REG_SIZE / (brw_type_size_bytes(inst->src[arg].type) * + inst->src[arg].stride); inst->src[arg].width = cvt(MIN2(orig_width, reg_width)) - 1; inst->src[arg].hstride = cvt(inst->src[arg].stride); inst->src[arg].vstride = inst->src[arg].hstride + inst->src[arg].width; @@ -904,14 +905,14 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst, * reading, and the base byte offset within that component. */ assert(entry->dst.stride == 1); - const unsigned component = rel_offset / type_sz(entry->dst.type); - const unsigned suboffset = rel_offset % type_sz(entry->dst.type); + const unsigned component = rel_offset / brw_type_size_bytes(entry->dst.type); + const unsigned suboffset = rel_offset % brw_type_size_bytes(entry->dst.type); /* Calculate the byte offset at the origin of the copy of the given * component and suboffset. */ inst->src[arg] = byte_offset(inst->src[arg], - component * entry_stride * type_sz(entry->src.type) + suboffset); + component * entry_stride * brw_type_size_bytes(entry->src.type) + suboffset); if (has_source_modifiers) { if (entry->dst.type != inst->src[arg].type) { @@ -941,7 +942,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, { bool progress = false; - if (type_sz(entry->src.type) > 4) + if (brw_type_size_bytes(entry->src.type) > 4) return false; if (inst->src[arg].file != VGRF) @@ -962,7 +963,8 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, * type, the entry doesn't contain all of the data that the user is * trying to use. */ - if (type_sz(inst->src[arg].type) > type_sz(entry->dst.type)) + if (brw_type_size_bits(inst->src[arg].type) > + brw_type_size_bits(entry->dst.type)) return false; fs_reg val = entry->src; @@ -976,8 +978,10 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, * ... * mul(8) g47<1>D g86<8,8,1>D g12<16,8,2>W */ - if (type_sz(inst->src[arg].type) < type_sz(entry->dst.type)) { - if (type_sz(inst->src[arg].type) != 2 || type_sz(entry->dst.type) != 4) + if (brw_type_size_bits(inst->src[arg].type) < + brw_type_size_bits(entry->dst.type)) { + if (brw_type_size_bytes(inst->src[arg].type) != 2 || + brw_type_size_bytes(entry->dst.type) != 4) return false; assert(inst->src[arg].subnr == 0 || inst->src[arg].subnr == 2); @@ -1059,7 +1063,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, * will now "fix" the constant. */ if (inst->opcode == BRW_OPCODE_MUL && - type_sz(inst->src[1].type) < 4 && + brw_type_size_bytes(inst->src[1].type) < 4 && (inst->src[0].type == BRW_TYPE_D || inst->src[0].type == BRW_TYPE_UD)) { inst->src[0] = val; @@ -1352,8 +1356,8 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx, int offset = 0; for (int i = 0; i < inst->sources; i++) { int effective_width = i < inst->header_size ? 8 : inst->exec_size; - const unsigned size_written = effective_width * - type_sz(inst->src[i].type); + const unsigned size_written = + effective_width * brw_type_size_bytes(inst->src[i].type); if (inst->src[i].file == VGRF || (inst->src[i].file == FIXED_GRF && inst->src[i].is_contiguous())) { diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index e7f422b9727..22471d10a09 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -224,7 +224,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst, reg.nr = imm_byte_offset / REG_SIZE; reg.subnr = imm_byte_offset % REG_SIZE; - if (type_sz(reg.type) > 4 && !devinfo->has_64bit_int) { + if (brw_type_size_bytes(reg.type) > 4 && !devinfo->has_64bit_int) { brw_MOV(p, subscript(dst, BRW_TYPE_D, 0), subscript(reg, BRW_TYPE_D, 0)); brw_set_default_swsb(p, tgl_swsb_null()); @@ -298,7 +298,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst, else brw_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); - if (type_sz(reg.type) > 4 && + if (brw_type_size_bytes(reg.type) > 4 && (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) { /* From the Cherryview PRM Vol 7. "Register Region Restrictions": * @@ -338,7 +338,7 @@ fs_generator::generate_shuffle(fs_inst *inst, /* Ivy bridge has some strange behavior that makes this a real pain to * implement for 64-bit values so we just don't bother. */ - assert(devinfo->has_64bit_float || type_sz(src.type) <= 4); + assert(devinfo->has_64bit_float || brw_type_size_bytes(src.type) <= 4); /* Gen12.5 adds the following region restriction: * @@ -388,8 +388,8 @@ fs_generator::generate_shuffle(fs_inst *inst, group_idx.vstride--; } - assert(type_sz(group_idx.type) <= 4); - if (type_sz(group_idx.type) == 4) { + assert(brw_type_size_bytes(group_idx.type) <= 4); + if (brw_type_size_bytes(group_idx.type) == 4) { /* The destination stride of an instruction (in bytes) must be * greater than or equal to the size of the rest of the * instruction. Since the address register is of type UW, we @@ -438,7 +438,7 @@ fs_generator::generate_shuffle(fs_inst *inst, /* Take into account the component size and horizontal stride. */ assert(src.vstride == src.hstride + src.width); insn = brw_SHL(p, addr, group_idx, - brw_imm_uw(util_logbase2(type_sz(src.type)) + + brw_imm_uw(util_logbase2(brw_type_size_bytes(src.type)) + src.hstride - 1)); if (devinfo->ver >= 12) brw_set_default_swsb(p, tgl_swsb_regdist(1)); @@ -468,7 +468,7 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst, /* The value is uniform across all channels */ brw_MOV(p, dst, src); - } else if (devinfo->ver < 11 && type_sz(src.type) == 4) { + } else if (devinfo->ver < 11 && brw_type_size_bytes(src.type) == 4) { /* This only works on 8-wide 32-bit values */ assert(inst->exec_size == 8); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); @@ -581,7 +581,7 @@ fs_generator::generate_ddx(const fs_inst *inst, width = BRW_WIDTH_4; } - struct brw_reg src0 = byte_offset(src, type_sz(src.type));; + struct brw_reg src0 = byte_offset(src, brw_type_size_bytes(src.type));; struct brw_reg src1 = src; src0.vstride = vstride; @@ -602,7 +602,7 @@ void fs_generator::generate_ddy(const fs_inst *inst, struct brw_reg dst, struct brw_reg src) { - const uint32_t type_size = type_sz(src.type); + const uint32_t type_size = brw_type_size_bytes(src.type); if (inst->opcode == FS_OPCODE_DDY_FINE) { /* produce accurate derivatives. @@ -1118,7 +1118,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case FS_OPCODE_PIXEL_X: assert(src[0].type == BRW_TYPE_UW); assert(src[1].type == BRW_TYPE_UW); - src[0].subnr = 0 * type_sz(src[0].type); + src[0].subnr = 0 * brw_type_size_bytes(src[0].type); if (src[1].file == BRW_IMMEDIATE_VALUE) { assert(src[1].ud == 0); brw_MOV(p, dst, stride(src[0], 8, 4, 1)); @@ -1130,7 +1130,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case FS_OPCODE_PIXEL_Y: assert(src[0].type == BRW_TYPE_UW); assert(src[1].type == BRW_TYPE_UW); - src[0].subnr = 4 * type_sz(src[0].type); + src[0].subnr = 4 * brw_type_size_bytes(src[0].type); if (src[1].file == BRW_IMMEDIATE_VALUE) { assert(src[1].ud == 0); brw_MOV(p, dst, stride(src[0], 8, 4, 1)); @@ -1244,7 +1244,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case SHADER_OPCODE_SEL_EXEC: assert(inst->force_writemask_all); - assert(devinfo->has_64bit_float || type_sz(dst.type) <= 4); + assert(devinfo->has_64bit_float || brw_type_size_bytes(dst.type) <= 4); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, dst, src[1]); brw_set_default_mask_control(p, BRW_MASK_ENABLE); @@ -1260,7 +1260,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case SHADER_OPCODE_CLUSTER_BROADCAST: { assert((!intel_device_info_is_9lp(devinfo) && - devinfo->has_64bit_float) || type_sz(src[0].type) <= 4); + devinfo->has_64bit_float) || brw_type_size_bytes(src[0].type) <= 4); assert(!src[0].negate && !src[0].abs); assert(src[1].file == BRW_IMMEDIATE_VALUE); assert(src[1].type == BRW_TYPE_UD); diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 5fc6857ce39..4e3c3e914be 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -624,7 +624,7 @@ brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst new_reg = brw_vec1_grf(reg->nr, 0); } else if (reg->stride > 4) { assert(reg != &inst->dst); - assert(reg->stride * type_sz(reg->type) <= REG_SIZE); + assert(reg->stride * brw_type_size_bytes(reg->type) <= REG_SIZE); new_reg = brw_vecn_grf(1, reg->nr, 0); new_reg = stride(new_reg, reg->stride, 1, 0); } else { @@ -636,7 +636,8 @@ brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst * * The maximum width value that could satisfy this restriction is: */ - const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type)); + const unsigned reg_width = + REG_SIZE / (reg->stride * brw_type_size_bytes(reg->type)); /* Because the hardware can only split source regions at a whole * multiple of width during decompression (i.e. vertically), clamp diff --git a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp index 431cc5ca7c4..2b51abb0371 100644 --- a/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp +++ b/src/intel/compiler/brw_fs_lower_integer_multiplication.cpp @@ -424,7 +424,7 @@ brw_fs_lower_integer_multiplication(fs_visitor &s) /* If the instruction is already in a form that does not need lowering, * return early. */ - if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4) + if (brw_type_size_bytes(inst->src[1].type) < 4 && brw_type_size_bytes(inst->src[0].type) <= 4) continue; if ((inst->dst.type == BRW_TYPE_Q || diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index c698d433698..70231b5ef57 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -38,7 +38,7 @@ namespace { bool is_byte_raw_mov(const fs_inst *inst) { - return type_sz(inst->dst.type) == 1 && + return brw_type_size_bytes(inst->dst.type) == 1 && inst->opcode == BRW_OPCODE_MOV && inst->src[0].type == inst->dst.type && !inst->saturate && @@ -55,17 +55,19 @@ namespace { unsigned i) { if (has_dst_aligned_region_restriction(devinfo, inst)) { - return MAX2(type_sz(inst->dst.type), byte_stride(inst->dst)); + return MAX2(brw_type_size_bytes(inst->dst.type), + byte_stride(inst->dst)); } else if (has_subdword_integer_region_restriction(devinfo, inst) && - type_sz(inst->src[i].type) < 4 && byte_stride(inst->src[i]) >= 4) { + brw_type_size_bytes(inst->src[i].type) < 4 && + byte_stride(inst->src[i]) >= 4) { /* Use a stride of 32bits if possible, since that will guarantee that * the copy emitted to lower this region won't be affected by the * sub-dword integer region restrictions. This may not be possible * for the second source of an instruction if we're required to use * packed data due to Wa_16012383669. */ - return (i == 1 ? type_sz(inst->src[i].type) : 4); + return (i == 1 ? brw_type_size_bytes(inst->src[i].type) : 4); } else { return byte_stride(inst->src[i]); @@ -84,16 +86,17 @@ namespace { return reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE); } else if (has_subdword_integer_region_restriction(devinfo, inst) && - type_sz(inst->src[i].type) < 4 && byte_stride(inst->src[i]) >= 4) { - const unsigned dst_byte_stride = MAX2(byte_stride(inst->dst), - type_sz(inst->dst.type)); + brw_type_size_bytes(inst->src[i].type) < 4 && + byte_stride(inst->src[i]) >= 4) { + const unsigned dst_byte_stride = + MAX2(byte_stride(inst->dst), brw_type_size_bytes(inst->dst.type)); const unsigned src_byte_stride = required_src_byte_stride(devinfo, inst, i); const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE); const unsigned src_byte_offset = reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE); - if (src_byte_stride > type_sz(inst->src[i].type)) { + if (src_byte_stride > brw_type_size_bytes(inst->src[i].type)) { assert(src_byte_stride >= dst_byte_stride); /* The source is affected by the Xe2+ sub-dword integer regioning * restrictions. For the case of source 0 BSpec#56640 specifies a @@ -120,7 +123,7 @@ namespace { const unsigned m = 64 * dst_byte_stride / src_byte_stride; return dst_byte_offset % m * src_byte_stride / dst_byte_stride; } else { - assert(src_byte_stride == type_sz(inst->src[i].type)); + assert(src_byte_stride == brw_type_size_bytes(inst->src[i].type)); /* A packed source is required, likely due to the stricter * requirements of the second source region. The source being * packed guarantees that the region of the original instruction @@ -159,8 +162,8 @@ namespace { * lowering pass will detect the mismatch in has_invalid_src_region * and fix the sources of the multiply instead of the destination. */ - return inst->dst.hstride * type_sz(inst->dst.type); - } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) && + return inst->dst.hstride * brw_type_size_bytes(inst->dst.type); + } else if (brw_type_size_bytes(inst->dst.type) < get_exec_type_size(inst) && !is_byte_raw_mov(inst)) { return get_exec_type_size(inst); } else { @@ -168,13 +171,13 @@ namespace { * size across all source and destination operands we are required to * lower. */ - unsigned max_stride = inst->dst.stride * type_sz(inst->dst.type); - unsigned min_size = type_sz(inst->dst.type); - unsigned max_size = type_sz(inst->dst.type); + unsigned max_stride = inst->dst.stride * brw_type_size_bytes(inst->dst.type); + unsigned min_size = brw_type_size_bytes(inst->dst.type); + unsigned max_size = brw_type_size_bytes(inst->dst.type); for (unsigned i = 0; i < inst->sources; i++) { if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) { - const unsigned size = type_sz(inst->src[i].type); + const unsigned size = brw_type_size_bytes(inst->src[i].type); max_stride = MAX2(max_stride, inst->src[i].stride * size); min_size = MIN2(min_size, size); max_size = MAX2(max_size, size); @@ -239,23 +242,23 @@ namespace { * don't support 64-bit types at all. */ if ((!devinfo->has_64bit_int || - intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) + intel_device_info_is_9lp(devinfo)) && brw_type_size_bytes(t) > 4) return BRW_TYPE_UD; else if (has_dst_aligned_region_restriction(devinfo, inst)) - return brw_int_type(type_sz(t), false); + return brw_int_type(brw_type_size_bytes(t), false); else return t; case SHADER_OPCODE_SEL_EXEC: if ((!has_64bit || devinfo->has_64bit_float_via_math_pipe) && - type_sz(t) > 4) + brw_type_size_bytes(t) > 4) return BRW_TYPE_UD; else return t; case SHADER_OPCODE_QUAD_SWIZZLE: if (has_dst_aligned_region_restriction(devinfo, inst)) - return brw_int_type(type_sz(t), false); + return brw_int_type(brw_type_size_bytes(t), false); else return t; @@ -276,10 +279,10 @@ namespace { * support 64-bit types at all. */ if ((!has_64bit || devinfo->verx10 >= 125 || - intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) + intel_device_info_is_9lp(devinfo)) && brw_type_size_bytes(t) > 4) return BRW_TYPE_UD; else - return brw_int_type(type_sz(t), false); + return brw_int_type(brw_type_size_bytes(t), false); default: return t; @@ -336,7 +339,7 @@ namespace { const brw_reg_type exec_type = get_exec_type(inst); const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE); const bool is_narrowing_conversion = !is_byte_raw_mov(inst) && - type_sz(inst->dst.type) < type_sz(exec_type); + brw_type_size_bytes(inst->dst.type) < brw_type_size_bytes(exec_type); return (has_dst_aligned_region_restriction(devinfo, inst) && (required_dst_byte_stride(inst) != byte_stride(inst->dst) || @@ -455,8 +458,8 @@ namespace brw { assert(v->devinfo->has_integer_dword_mul || inst->opcode != BRW_OPCODE_MUL || brw_type_is_float(get_exec_type(inst)) || - MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 || - type_sz(inst->src[i].type) == get_exec_type_size(inst)); + MIN2(brw_type_size_bytes(inst->src[0].type), brw_type_size_bytes(inst->src[1].type)) >= 4 || + brw_type_size_bytes(inst->src[i].type) == get_exec_type_size(inst)); const fs_builder ibld(v, block, inst); const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); @@ -488,8 +491,8 @@ namespace { * instructions into the program unnecessarily. */ const unsigned stride = - type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : - type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); + brw_type_size_bytes(inst->dst.type) * inst->dst.stride <= brw_type_size_bytes(type) ? 1 : + brw_type_size_bytes(inst->dst.type) * inst->dst.stride / brw_type_size_bytes(type); fs_reg tmp = ibld.vgrf(type, stride); ibld.UNDEF(tmp); tmp = horiz_stride(tmp, stride); @@ -532,7 +535,7 @@ namespace { const intel_device_info *devinfo = v->devinfo; const fs_builder ibld(v, block, inst); const unsigned stride = required_src_byte_stride(devinfo, inst, i) / - type_sz(inst->src[i].type); + brw_type_size_bytes(inst->src[i].type); assert(stride > 0); /* Calculate the size of the temporary allocation manually instead of * relying on the builder, since we may have to add some amount of @@ -541,7 +544,8 @@ namespace { */ const unsigned size = DIV_ROUND_UP(required_src_byte_offset(v->devinfo, inst, i) + - inst->exec_size * stride * type_sz(inst->src[i].type), + inst->exec_size * stride * + brw_type_size_bytes(inst->src[i].type), reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); fs_reg tmp(VGRF, v->alloc.allocate(size), inst->src[i].type); ibld.UNDEF(tmp); @@ -551,9 +555,9 @@ namespace { /* Emit a series of 32-bit integer copies with any source modifiers * cleaned up (because their semantics are dependent on the type). */ - const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), + const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4), false); - const unsigned n = type_sz(tmp.type) / type_sz(raw_type); + const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type); fs_reg raw_src = inst->src[i]; raw_src.negate = false; raw_src.abs = false; @@ -599,7 +603,7 @@ namespace { const fs_builder ibld(v, block, inst); const unsigned stride = required_dst_byte_stride(inst) / - type_sz(inst->dst.type); + brw_type_size_bytes(inst->dst.type); assert(stride > 0); fs_reg tmp = ibld.vgrf(inst->dst.type, stride); ibld.UNDEF(tmp); @@ -608,9 +612,9 @@ namespace { /* Emit a series of 32-bit integer copies from the temporary into the * original destination. */ - const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), + const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4), false); - const unsigned n = type_sz(tmp.type) / type_sz(raw_type); + const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type); if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) { /* Note that in general we cannot simply predicate the copies on the @@ -656,7 +660,7 @@ namespace { assert(inst->dst.type == get_exec_type(inst)); const unsigned mask = has_invalid_exec_type(v->devinfo, inst); const brw_reg_type raw_type = required_exec_type(v->devinfo, inst); - const unsigned n = get_exec_type_size(inst) / type_sz(raw_type); + const unsigned n = get_exec_type_size(inst) / brw_type_size_bytes(raw_type); const fs_builder ibld(v, block, inst); fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride); diff --git a/src/intel/compiler/brw_fs_lower_simd_width.cpp b/src/intel/compiler/brw_fs_lower_simd_width.cpp index 04553a20e6d..ab85e38aa77 100644 --- a/src/intel/compiler/brw_fs_lower_simd_width.cpp +++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp @@ -408,7 +408,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst) const unsigned swiz = inst->src[1].ud; return (is_uniform(inst->src[0]) ? get_fpu_lowered_simd_width(shader, inst) : - devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 : + devinfo->ver < 11 && brw_type_size_bytes(inst->src[0].type) == 4 ? 8 : swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 : get_fpu_lowered_simd_width(shader, inst)); } @@ -425,7 +425,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst) const unsigned max_size = 2 * REG_SIZE; /* Prior to Broadwell, we only have 8 address subregisters. */ return MIN3(16, - max_size / (inst->dst.stride * type_sz(inst->dst.type)), + max_size / (inst->dst.stride * brw_type_size_bytes(inst->dst.type)), inst->exec_size); } @@ -440,7 +440,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst) */ assert(!inst->header_size); for (unsigned i = 0; i < inst->sources; i++) - assert(type_sz(inst->dst.type) == type_sz(inst->src[i].type) || + assert(brw_type_size_bits(inst->dst.type) == brw_type_size_bits(inst->src[i].type) || inst->src[i].file == BAD_FILE); return inst->exec_size / DIV_ROUND_UP(reg_count, 2); @@ -465,7 +465,7 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i) (inst->components_read(i) == 1 && lbld.dispatch_width() <= inst->exec_size)) || (inst->flags_written(lbld.shader->devinfo) & - brw_fs_flag_mask(inst->src[i], type_sz(inst->src[i].type))); + brw_fs_flag_mask(inst->src[i], brw_type_size_bytes(inst->src[i].type))); } /** diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 6203ec1cdfa..7abf36c6e1b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -858,7 +858,7 @@ emit_fsign(nir_to_brw_state &ntb, const fs_builder &bld, const nir_alu_instr *in op[0] = offset(op[0], bld, fsign_instr->src[0].swizzle[channel]); } - if (type_sz(op[0].type) == 2) { + if (brw_type_size_bytes(op[0].type) == 2) { /* AND(val, 0x8000) gives the sign bit. * * Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is not zero. @@ -878,7 +878,7 @@ emit_fsign(nir_to_brw_state &ntb, const fs_builder &bld, const nir_alu_instr *in } inst->predicate = BRW_PREDICATE_NORMAL; - } else if (type_sz(op[0].type) == 4) { + } else if (brw_type_size_bytes(op[0].type) == 4) { /* AND(val, 0x80000000) gives the sign bit. * * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not @@ -1024,7 +1024,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, default: for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - assert(type_sz(op[i].type) > 1); + assert(brw_type_size_bytes(op[i].type) > 1); } } #endif @@ -1108,7 +1108,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, if (BRW_RND_MODE_UNSPECIFIED != rnd) bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(rnd)); - assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */ + assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */ inst = bld.MOV(result, op[0]); break; } @@ -1145,19 +1145,19 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, if (result.type == BRW_TYPE_B || result.type == BRW_TYPE_UB || result.type == BRW_TYPE_HF) - assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */ + assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */ if (op[0].type == BRW_TYPE_B || op[0].type == BRW_TYPE_UB || op[0].type == BRW_TYPE_HF) - assert(type_sz(result.type) < 8); /* brw_nir_lower_conversions */ + assert(brw_type_size_bytes(result.type) < 8); /* brw_nir_lower_conversions */ inst = bld.MOV(result, op[0]); break; case nir_op_i2i8: case nir_op_u2u8: - assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */ + assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */ FALLTHROUGH; case nir_op_i2i16: case nir_op_u2u16: { @@ -1220,7 +1220,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, } if (op[0].type == BRW_TYPE_HF) - assert(type_sz(result.type) < 8); /* brw_nir_lower_conversions */ + assert(brw_type_size_bytes(result.type) < 8); /* brw_nir_lower_conversions */ inst = bld.MOV(result, op[0]); break; @@ -1468,7 +1468,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_ine32: { fs_reg dest = result; - const uint32_t bit_size = type_sz(op[0].type) * 8; + const uint32_t bit_size = brw_type_size_bits(op[0].type); if (bit_size != 32) { dest = bld.vgrf(op[0].type); bld.UNDEF(dest); @@ -2517,7 +2517,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst, fs_visitor &s = ntb.s; - assert(type_sz(dst.type) == 4); + assert(brw_type_size_bytes(dst.type) == 4); struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s.prog_data); const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; @@ -3045,7 +3045,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, * Also attempt to deal with gl_PointSize being in the .w component. */ if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { - assert(type_sz(dst.type) == 4); + assert(brw_type_size_bytes(dst.type) == 4); inst->dst = bld.vgrf(dst.type, 4); inst->size_written = 4 * REG_SIZE * reg_unit(devinfo); bld.MOV(dst, offset(inst->dst, bld, 3)); @@ -4622,8 +4622,10 @@ static fs_reg brw_nir_reduction_op_identity(const fs_builder &bld, nir_op op, brw_reg_type type) { - nir_const_value value = nir_alu_binop_identity(op, type_sz(type) * 8); - switch (type_sz(type)) { + nir_const_value value = + nir_alu_binop_identity(op, brw_type_size_bits(type)); + + switch (brw_type_size_bytes(type)) { case 1: if (type == BRW_TYPE_UB) { return brw_imm_uw(value.u8); @@ -6270,13 +6272,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * the type size */ unsigned base_offset = nir_intrinsic_base(instr); - assert(base_offset % 4 == 0 || base_offset % type_sz(dest.type) == 0); + assert(base_offset % 4 == 0 || base_offset % brw_type_size_bytes(dest.type) == 0); fs_reg src(UNIFORM, base_offset / 4, dest.type); if (nir_src_is_const(instr->src[0])) { unsigned load_offset = nir_src_as_uint(instr->src[0]); - assert(load_offset % type_sz(dest.type) == 0); + assert(load_offset % brw_type_size_bytes(dest.type) == 0); /* The base offset can only handle 32-bit units, so for 16-bit * data take the modulo of the offset with 4 bytes and add it to * the offset to read from within the source register. @@ -6296,13 +6298,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * one component of the vector. */ assert(nir_intrinsic_range(instr) >= - instr->num_components * type_sz(dest.type)); + instr->num_components * brw_type_size_bytes(dest.type)); unsigned read_size = nir_intrinsic_range(instr) - - (instr->num_components - 1) * type_sz(dest.type); + (instr->num_components - 1) * brw_type_size_bytes(dest.type); bool supports_64bit_indirects = !intel_device_info_is_9lp(devinfo); - if (type_sz(dest.type) != 8 || supports_64bit_indirects) { + if (brw_type_size_bytes(dest.type) != 8 || supports_64bit_indirects) { for (unsigned j = 0; j < instr->num_components; j++) { bld.emit(SHADER_OPCODE_MOV_INDIRECT, offset(dest, bld, j), offset(src, bld, j), @@ -6310,12 +6312,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, } } else { const unsigned num_mov_indirects = - type_sz(dest.type) / type_sz(BRW_TYPE_UD); + brw_type_size_bytes(dest.type) / brw_type_size_bytes(BRW_TYPE_UD); /* We read a little bit less per MOV INDIRECT, as they are now * 32-bits ones instead of 64-bit. Fix read_size then. */ const unsigned read_size_32bit = read_size - - (num_mov_indirects - 1) * type_sz(BRW_TYPE_UD); + (num_mov_indirects - 1) * brw_type_size_bytes(BRW_TYPE_UD); for (unsigned j = 0; j < instr->num_components; j++) { for (unsigned i = 0; i < num_mov_indirects; i++) { bld.emit(SHADER_OPCODE_MOV_INDIRECT, @@ -6344,14 +6346,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, fs_reg base_offset = retype(get_nir_src(ntb, instr->src[1]), BRW_TYPE_UD); - const unsigned comps_per_load = type_sz(dest.type) == 8 ? 2 : 4; + const unsigned comps_per_load = brw_type_size_bytes(dest.type) == 8 ? 2 : 4; for (int i = 0; i < instr->num_components; i += comps_per_load) { const unsigned remaining = instr->num_components - i; s.VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surface, surface_handle, base_offset, - i * type_sz(dest.type), + i * brw_type_size_bytes(dest.type), instr->def.bit_size / 8, MIN2(remaining, comps_per_load)); } @@ -6422,7 +6424,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, * we let CSE deal with duplicate loads. Here we see a vector access * and we have to split it if necessary. */ - const unsigned type_size = type_sz(dest.type); + const unsigned type_size = brw_type_size_bytes(dest.type); const unsigned load_offset = nir_src_as_uint(instr->src[1]); const unsigned ubo_block = brw_nir_ubo_surface_index_get_push_block(instr->src[0]); @@ -7416,15 +7418,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, bld.emit_scan(brw_op, scan, cluster_size, cond_mod); dest.type = src.type; - if (cluster_size * type_sz(src.type) >= REG_SIZE * 2) { + if (cluster_size * brw_type_size_bytes(src.type) >= REG_SIZE * 2) { /* In this case, CLUSTER_BROADCAST instruction isn't needed because * the distance between clusters is at least 2 GRFs. In this case, * we don't need the weird striding of the CLUSTER_BROADCAST * instruction and can just do regular MOVs. */ - assert((cluster_size * type_sz(src.type)) % (REG_SIZE * 2) == 0); + assert((cluster_size * brw_type_size_bytes(src.type)) % (REG_SIZE * 2) == 0); const unsigned groups = - (s.dispatch_width * type_sz(src.type)) / (REG_SIZE * 2); + (s.dispatch_width * brw_type_size_bytes(src.type)) / (REG_SIZE * 2); const unsigned group_size = s.dispatch_width / groups; for (unsigned i = 0; i < groups; i++) { const unsigned cluster = (i * group_size) / cluster_size; @@ -7855,7 +7857,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, static fs_reg expand_to_32bit(const fs_builder &bld, const fs_reg &src) { - if (type_sz(src.type) == 2) { + if (brw_type_size_bytes(src.type) == 2) { fs_reg src32 = bld.vgrf(BRW_TYPE_UD); bld.MOV(src32, retype(src, BRW_TYPE_UW)); return src32; @@ -8454,23 +8456,23 @@ shuffle_src_to_dst(const fs_builder &bld, uint32_t first_component, uint32_t components) { - if (type_sz(src.type) == type_sz(dst.type)) { + if (brw_type_size_bytes(src.type) == brw_type_size_bytes(dst.type)) { assert(!regions_overlap(dst, - type_sz(dst.type) * bld.dispatch_width() * components, + brw_type_size_bytes(dst.type) * bld.dispatch_width() * components, offset(src, bld, first_component), - type_sz(src.type) * bld.dispatch_width() * components)); + brw_type_size_bytes(src.type) * bld.dispatch_width() * components)); for (unsigned i = 0; i < components; i++) { bld.MOV(retype(offset(dst, bld, i), src.type), offset(src, bld, i + first_component)); } - } else if (type_sz(src.type) < type_sz(dst.type)) { + } else if (brw_type_size_bytes(src.type) < brw_type_size_bytes(dst.type)) { /* Source is shuffled into destination */ - unsigned size_ratio = type_sz(dst.type) / type_sz(src.type); + unsigned size_ratio = brw_type_size_bytes(dst.type) / brw_type_size_bytes(src.type); assert(!regions_overlap(dst, - type_sz(dst.type) * bld.dispatch_width() * + brw_type_size_bytes(dst.type) * bld.dispatch_width() * DIV_ROUND_UP(components, size_ratio), offset(src, bld, first_component), - type_sz(src.type) * bld.dispatch_width() * components)); + brw_type_size_bytes(src.type) * bld.dispatch_width() * components)); brw_reg_type shuffle_type = brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(src.type)); @@ -8483,16 +8485,16 @@ shuffle_src_to_dst(const fs_builder &bld, } } else { /* Source is unshuffled into destination */ - unsigned size_ratio = type_sz(src.type) / type_sz(dst.type); + unsigned size_ratio = brw_type_size_bytes(src.type) / brw_type_size_bytes(dst.type); assert(!regions_overlap(dst, - type_sz(dst.type) * bld.dispatch_width() * components, + brw_type_size_bytes(dst.type) * bld.dispatch_width() * components, offset(src, bld, first_component / size_ratio), - type_sz(src.type) * bld.dispatch_width() * + brw_type_size_bytes(src.type) * bld.dispatch_width() * DIV_ROUND_UP(components + (first_component % size_ratio), size_ratio))); brw_reg_type shuffle_type = - brw_type_with_size(BRW_TYPE_D, brw_Type_size_bits(dst.type)); + brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(dst.type)); for (unsigned i = 0; i < components; i++) { fs_reg shuffle_component_i = subscript(offset(src, bld, (first_component + i) / size_ratio), @@ -8510,13 +8512,13 @@ shuffle_from_32bit_read(const fs_builder &bld, uint32_t first_component, uint32_t components) { - assert(type_sz(src.type) == 4); + assert(brw_type_size_bytes(src.type) == 4); /* This function takes components in units of the destination type while * shuffle_src_to_dst takes components in units of the smallest type */ - if (type_sz(dst.type) > 4) { - assert(type_sz(dst.type) == 8); + if (brw_type_size_bytes(dst.type) > 4) { + assert(brw_type_size_bytes(dst.type) == 8); first_component *= 2; components *= 2; } diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index 0b81b7b5e7c..66e6a3b7057 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -166,7 +166,7 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read) unsigned i; unsigned size = lp->header_size * REG_SIZE; for (i = lp->header_size; size < size_read && i < lp->sources; i++) - size += lp->exec_size * type_sz(lp->src[i].type); + size += lp->exec_size * brw_type_size_bytes(lp->src[i].type); /* Size read must cover exactly a subset of sources. */ assert(size == size_read); @@ -225,7 +225,7 @@ brw_fs_opt_zero_samples(fs_visitor &s) for (unsigned i = params - 1; i > first_param_idx; i--) { if (lp->src[i].file != BAD_FILE && !lp->src[i].is_zero()) break; - zero_size += lp->exec_size * type_sz(lp->src[i].type) * lp->dst.stride; + zero_size += lp->exec_size * brw_type_size_bytes(lp->src[i].type) * lp->dst.stride; } /* Round down to ensure to only consider full registers. */ diff --git a/src/intel/compiler/brw_fs_opt_algebraic.cpp b/src/intel/compiler/brw_fs_opt_algebraic.cpp index 5154270688b..ecf34f12fb0 100644 --- a/src/intel/compiler/brw_fs_opt_algebraic.cpp +++ b/src/intel/compiler/brw_fs_opt_algebraic.cpp @@ -130,8 +130,8 @@ brw_fs_opt_algebraic(fs_visitor &s) * we might use the full accumulator in the MUL/MACH macro, we * shouldn't replace such MULs with MOVs. */ - if ((brw_reg_type_to_size(inst->src[0].type) == 4 || - brw_reg_type_to_size(inst->src[1].type) == 4) && + if ((brw_type_size_bytes(inst->src[0].type) == 4 || + brw_type_size_bytes(inst->src[1].type) == 4) && (inst->dst.is_accumulator() || inst->writes_accumulator_implicitly(devinfo))) break; @@ -330,7 +330,7 @@ brw_fs_opt_algebraic(fs_visitor &s) fs_reg result; - switch (type_sz(inst->src[0].type)) { + switch (brw_type_size_bytes(inst->src[0].type)) { case 2: result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f))); break; diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index a3f620dba00..7d3bdd76467 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -1061,7 +1061,8 @@ fs_reg_alloc::spill_reg(unsigned spill_reg) * instruction and set force_writemask_all on the spill. */ const bool per_channel = - inst->dst.is_contiguous() && type_sz(inst->dst.type) == 4 && + inst->dst.is_contiguous() && + brw_type_size_bytes(inst->dst.type) == 4 && inst->exec_size == width; /* Builder used to emit the scratch messages. */ diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index e5ca3f89b2a..fc5b844cd64 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -57,7 +57,7 @@ is_nop_mov(const fs_inst *inst) } dst.offset += (i < inst->header_size ? REG_SIZE : inst->exec_size * dst.stride * - type_sz(inst->src[i].type)); + brw_type_size_bytes(inst->src[i].type)); } return true; } else if (inst->opcode == BRW_OPCODE_MOV) { diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index c6c8995879e..8c68df53ed8 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -86,7 +86,7 @@ namespace { !inst->is_control_source(i)) { const brw_reg_type t = inst->src[i].type; has_int_src |= !brw_type_is_float(t); - has_long_src |= type_sz(t) >= 8; + has_long_src |= brw_type_size_bytes(t) >= 8; } } @@ -120,9 +120,11 @@ namespace { const brw_reg_type t = get_exec_type(inst); const bool is_dword_multiply = !brw_type_is_float(t) && ((inst->opcode == BRW_OPCODE_MUL && - MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) || + MIN2(brw_type_size_bytes(inst->src[0].type), + brw_type_size_bytes(inst->src[1].type)) >= 4) || (inst->opcode == BRW_OPCODE_MAD && - MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4)); + MIN2(brw_type_size_bytes(inst->src[1].type), + brw_type_size_bytes(inst->src[2].type)) >= 4)); if (is_unordered(devinfo, inst)) return TGL_PIPE_NONE; @@ -136,13 +138,14 @@ namespace { return TGL_PIPE_INT; else if (inst->opcode == FS_OPCODE_PACK_HALF_2x16_SPLIT) return TGL_PIPE_FLOAT; - else if (devinfo->ver >= 20 && type_sz(inst->dst.type) >= 8 && + else if (devinfo->ver >= 20 && + brw_type_size_bytes(inst->dst.type) >= 8 && brw_type_is_float(inst->dst.type)) { assert(devinfo->has_64bit_float); return TGL_PIPE_LONG; } else if (devinfo->ver < 20 && - (type_sz(inst->dst.type) >= 8 || type_sz(t) >= 8 || - is_dword_multiply)) { + (brw_type_size_bytes(inst->dst.type) >= 8 || + brw_type_size_bytes(t) >= 8 || is_dword_multiply)) { assert(devinfo->has_64bit_float || devinfo->has_64bit_int || devinfo->has_integer_dword_mul); return TGL_PIPE_LONG; diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp b/src/intel/compiler/brw_fs_sel_peephole.cpp index 0f08356331f..cef10f51bbe 100644 --- a/src/intel/compiler/brw_fs_sel_peephole.cpp +++ b/src/intel/compiler/brw_fs_sel_peephole.cpp @@ -206,7 +206,7 @@ brw_fs_opt_peephole_sel(fs_visitor &s) /* 64-bit immediates can't be placed in src1. */ fs_reg src1(else_mov[i]->src[0]); - if (src1.file == IMM && type_sz(src1.type) == 8) { + if (src1.file == IMM && brw_type_size_bytes(src1.type) == 8) { src1 = ibld.vgrf(else_mov[i]->src[0].type); ibld.MOV(src1, else_mov[i]->src[0]); } diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index c1dc7a27d0d..bb6d047d608 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -168,7 +168,7 @@ brw_fs_validate(const fs_visitor &s) */ fsv_assert_lte(inst->src[i].vstride, 1); - if (type_sz(inst->src[i].type) > 4) + if (brw_type_size_bytes(inst->src[i].type) > 4) fsv_assert_eq(inst->src[i].vstride, 1); } } diff --git a/src/intel/compiler/brw_gram.y b/src/intel/compiler/brw_gram.y index 0875b14794a..b5880955e8d 100644 --- a/src/intel/compiler/brw_gram.y +++ b/src/intel/compiler/brw_gram.y @@ -1318,7 +1318,7 @@ dstoperand: $$.type = $4; $$.writemask = $3; $$.swizzle = BRW_SWIZZLE_NOOP; - $$.subnr = $$.subnr * brw_reg_type_to_size($4); + $$.subnr = $$.subnr * brw_type_size_bytes($4); } ; @@ -1329,7 +1329,7 @@ dstoperandex: $$.hstride = $2; $$.type = $4; $$.writemask = $3; - $$.subnr = $$.subnr * brw_reg_type_to_size($4); + $$.subnr = $$.subnr * brw_type_size_bytes($4); } /* BSpec says "When the conditional modifier is present, updates * to the selected flag register also occur. In this case, the diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 7824492811d..dacbc1bea0f 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -152,7 +152,7 @@ horiz_offset(const fs_reg ®, unsigned delta) return reg; case VGRF: case ATTR: - return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); + return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type)); case ARF: case FIXED_GRF: if (reg.is_null()) { @@ -163,10 +163,10 @@ horiz_offset(const fs_reg ®, unsigned delta) const unsigned width = 1 << reg.width; if (delta % width == 0) { - return byte_offset(reg, delta / width * vstride * type_sz(reg.type)); + return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type)); } else { assert(vstride == hstride * width); - return byte_offset(reg, delta * hstride * type_sz(reg.type)); + return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type)); } } } @@ -245,7 +245,7 @@ reg_padding(const fs_reg &r) const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride : r.hstride == 0 ? 0 : 1 << (r.hstride - 1)); - return (MAX2(1, stride) - 1) * type_sz(r.type); + return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type); } /** @@ -333,29 +333,29 @@ quarter(const fs_reg ®, unsigned idx) static inline fs_reg subscript(fs_reg reg, brw_reg_type type, unsigned i) { - assert((i + 1) * type_sz(type) <= type_sz(reg.type)); + assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type)); if (reg.file == ARF || reg.file == FIXED_GRF) { /* The stride is encoded inconsistently for fixed GRF and ARF registers * as the log2 of the actual vertical and horizontal strides. */ - const int delta = util_logbase2(type_sz(reg.type)) - - util_logbase2(type_sz(type)); + const int delta = util_logbase2(brw_type_size_bytes(reg.type)) - + util_logbase2(brw_type_size_bytes(type)); reg.hstride += (reg.hstride ? delta : 0); reg.vstride += (reg.vstride ? delta : 0); } else if (reg.file == IMM) { - unsigned bit_size = type_sz(type) * 8; + unsigned bit_size = brw_type_size_bits(type); reg.u64 >>= i * bit_size; reg.u64 &= BITFIELD64_MASK(bit_size); if (bit_size <= 16) reg.u64 |= reg.u64 << 16; return retype(reg, type); } else { - reg.stride *= type_sz(reg.type) / type_sz(type); + reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type); } - return byte_offset(retype(reg, type), i * type_sz(type)); + return byte_offset(retype(reg, type), i * brw_type_size_bytes(type)); } static inline fs_reg @@ -657,9 +657,9 @@ get_exec_type(const fs_inst *inst) if (inst->src[i].file != BAD_FILE && !inst->is_control_source(i)) { const brw_reg_type t = get_exec_type(inst->src[i].type); - if (type_sz(t) > type_sz(exec_type)) + if (brw_type_size_bytes(t) > brw_type_size_bytes(exec_type)) exec_type = t; - else if (type_sz(t) == type_sz(exec_type) && + else if (brw_type_size_bytes(t) == brw_type_size_bytes(exec_type) && brw_type_is_float(t)) exec_type = t; } @@ -683,7 +683,7 @@ get_exec_type(const fs_inst *inst) * "Conversion between Integer and HF (Half Float) must be DWord aligned * and strided by a DWord on the destination." */ - if (type_sz(exec_type) == 2 && + if (brw_type_size_bytes(exec_type) == 2 && inst->dst.type != exec_type) { if (exec_type == BRW_TYPE_HF) exec_type = BRW_TYPE_F; @@ -697,7 +697,7 @@ get_exec_type(const fs_inst *inst) static inline unsigned get_exec_type_size(const fs_inst *inst) { - return type_sz(get_exec_type(inst)); + return brw_type_size_bytes(get_exec_type(inst)); } static inline bool @@ -734,7 +734,7 @@ byte_stride(const fs_reg ®) case IMM: case VGRF: case ATTR: - return reg.stride * type_sz(reg.type); + return reg.stride * brw_type_size_bytes(reg.type); case ARF: case FIXED_GRF: if (reg.is_null()) { @@ -745,9 +745,9 @@ byte_stride(const fs_reg ®) const unsigned width = 1 << reg.width; if (width == 1) { - return vstride * type_sz(reg.type); + return vstride * brw_type_size_bytes(reg.type); } else if (hstride * width == vstride) { - return hstride * type_sz(reg.type); + return hstride * brw_type_size_bytes(reg.type); } else { return ~0u; } @@ -783,12 +783,12 @@ has_dst_aligned_region_restriction(const intel_device_info *devinfo, */ const bool is_dword_multiply = !brw_type_is_float(exec_type) && ((inst->opcode == BRW_OPCODE_MUL && - MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) || + MIN2(brw_type_size_bytes(inst->src[0].type), brw_type_size_bytes(inst->src[1].type)) >= 4) || (inst->opcode == BRW_OPCODE_MAD && - MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4)); + MIN2(brw_type_size_bytes(inst->src[1].type), brw_type_size_bytes(inst->src[2].type)) >= 4)); - if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 || - (type_sz(exec_type) == 4 && is_dword_multiply)) + if (brw_type_size_bytes(dst_type) > 4 || brw_type_size_bytes(exec_type) > 4 || + (brw_type_size_bytes(exec_type) == 4 && is_dword_multiply)) return intel_device_info_is_9lp(devinfo) || devinfo->verx10 >= 125; else if (brw_type_is_float(dst_type)) @@ -818,10 +818,12 @@ has_subdword_integer_region_restriction(const intel_device_info *devinfo, { if (devinfo->ver >= 20 && brw_type_is_int(inst->dst.type) && - MAX2(byte_stride(inst->dst), type_sz(inst->dst.type)) < 4) { + MAX2(byte_stride(inst->dst), + brw_type_size_bytes(inst->dst.type)) < 4) { for (unsigned i = 0; i < num_srcs; i++) { if (brw_type_is_int(srcs[i].type) && - type_sz(srcs[i].type) < 4 && byte_stride(srcs[i]) >= 4) + brw_type_size_bytes(srcs[i].type) < 4 && + byte_stride(srcs[i]) >= 4) return true; } } diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index d897a97cc21..b920bf16c83 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -136,14 +136,14 @@ namespace { } /* Convert the execution size to GRF units. */ - sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE); + sx = DIV_ROUND_UP(inst->exec_size * brw_type_size_bytes(tx), REG_SIZE); /* 32x32 integer multiplication has half the usual ALU throughput. * Treat it as double-precision. */ if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) && - !brw_type_is_float(tx) && type_sz(tx) == 4 && - type_sz(inst->src[0].type) == type_sz(inst->src[1].type)) + !brw_type_is_float(tx) && brw_type_size_bytes(tx) == 4 && + brw_type_size_bytes(inst->src[0].type) == brw_type_size_bytes(inst->src[1].type)) tx = brw_int_type(8, tx == BRW_TYPE_D); rcount = inst->opcode == BRW_OPCODE_DPAS ? inst->rcount : 0; @@ -317,7 +317,7 @@ namespace { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 0, 10, 6 /* XXX */, 14, 0, 0); } else { - if (type_sz(info.tx) > 4) + if (brw_type_size_bytes(info.tx) > 4) return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); else @@ -335,7 +335,7 @@ namespace { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 0, 10, 6, 14, 0, 0); } else { - if (type_sz(info.tx) > 4) + if (brw_type_size_bytes(info.tx) > 4) return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); else @@ -358,7 +358,7 @@ namespace { return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); } else { - if (type_sz(info.tx) > 4) + if (brw_type_size_bytes(info.tx) > 4) return calculate_desc(info, EU_UNIT_FPU, 0, 4, 1, 0, 4, 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); else @@ -854,7 +854,7 @@ namespace { { assert(inst->reads_accumulator_implicitly() || inst->writes_accumulator_implicitly(devinfo)); - const unsigned offset = (inst->group + i) * type_sz(tx) * + const unsigned offset = (inst->group + i) * brw_type_size_bytes(tx) * (brw_type_is_float(tx) ? 1 : 2); return offset / (reg_unit(devinfo) * REG_SIZE) % 2; } diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index ed78c8b0a0a..e3caebd8e13 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -200,11 +200,11 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE]; const fs_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ? inst->src[URB_LOGICAL_SRC_DATA] : fs_reg(brw_imm_ud(0)); - assert(type_sz(src.type) == 4); + assert(brw_type_size_bytes(src.type) == 4); /* Calculate the total number of components of the payload. */ const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA)); - const unsigned src_sz = type_sz(src.type); + const unsigned src_sz = brw_type_size_bytes(src.type); fs_reg payload = bld.vgrf(BRW_TYPE_UD); @@ -404,7 +404,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, * hardware when doing a SIMD8 write depending on whether we have * selected the subspans for the first or second half respectively. */ - assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4); + assert(sample_mask.file != BAD_FILE && + brw_type_size_bytes(sample_mask.type) == 4); sample_mask.type = BRW_TYPE_UW; sample_mask.stride *= 2; @@ -1211,7 +1212,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, */ for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) { if (src[i].file != BAD_FILE) { - src_type_size = brw_reg_type_to_size(src[i].type); + src_type_size = brw_type_size_bytes(src[i].type); break; } } @@ -1227,7 +1228,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, if (inst->opcode != SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) { for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) { assert(src[i].file == BAD_FILE || - brw_reg_type_to_size(src[i].type) == src_type_size); + brw_type_size_bytes(src[i].type) == src_type_size); } } #endif @@ -1664,8 +1665,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) /* Calculate the total number of components of the payload. */ const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS); const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA); - const unsigned src_sz = type_sz(src.type); - const unsigned dst_sz = type_sz(inst->dst.type); + const unsigned src_sz = brw_type_size_bytes(src.type); + const unsigned dst_sz = brw_type_size_bytes(inst->dst.type); const bool has_side_effects = inst->has_side_effects(); @@ -1954,7 +1955,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) if (write) { const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); data = retype(bld.move_to_vgrf(src, src_sz), BRW_TYPE_UD); - ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE; + ex_mlen = src_sz * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE; } inst->opcode = SHADER_OPCODE_SEND; @@ -1981,7 +1982,7 @@ emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr) { const fs_builder ubld = bld.exec_all().group(8, 0); - assert(type_sz(addr.type) == 8 && addr.stride == 0); + assert(brw_type_size_bytes(addr.type) == 8 && addr.stride == 0); fs_reg expanded_addr = addr; if (addr.file == UNIFORM) { @@ -2031,8 +2032,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) /* Get the logical send arguments. */ const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS]; const fs_reg src = inst->src[A64_LOGICAL_SRC]; - const unsigned src_sz = type_sz(src.type); - const unsigned dst_sz = type_sz(inst->dst.type); + const unsigned src_sz = brw_type_size_bytes(src.type); + const unsigned dst_sz = brw_type_size_bytes(inst->dst.type); const unsigned src_comps = inst->components_read(1); assert(inst->src[A64_LOGICAL_ARG].file == IMM); @@ -2181,13 +2182,13 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) payload = emit_a64_oword_block_header(bld, addr); if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) { - ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE; + ex_mlen = src_comps * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE; payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD); } } else { /* On Skylake and above, we have SENDS */ mlen = 2 * (inst->exec_size / 8); - ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE; + ex_mlen = src_comps * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE; payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD); payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD); } @@ -2243,12 +2244,12 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) if (lsc_opcode_is_atomic_float((enum lsc_opcode) arg)) { desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, - type_sz(inst->dst.type) * 8, + brw_type_size_bits(inst->dst.type), lsc_op_to_legacy_atomic(arg), !inst->dst.is_null()); } else { desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, - type_sz(inst->dst.type) * 8, + brw_type_size_bits(inst->dst.type), lsc_op_to_legacy_atomic(arg), !inst->dst.is_null()); } @@ -2583,7 +2584,8 @@ lower_btd_logical_send(const fs_builder &bld, fs_inst *inst) ubld.MOV(header, brw_imm_ud(0)); switch (inst->opcode) { case SHADER_OPCODE_BTD_SPAWN_LOGICAL: - assert(type_sz(global_addr.type) == 8 && global_addr.stride == 0); + assert(brw_type_size_bytes(global_addr.type) == 8 && + global_addr.stride == 0); global_addr.type = BRW_TYPE_UD; global_addr.stride = 1; ubld.group(2, 0).MOV(header, global_addr); diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index de6230622b2..554710b5ac8 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -283,13 +283,6 @@ struct brw_indirect { unsigned pad:18; }; - -static inline unsigned -type_sz(unsigned type) -{ - return brw_type_size_bytes((enum brw_reg_type) type); -} - static inline enum brw_reg_type get_exec_type(const enum brw_reg_type type) { @@ -366,7 +359,7 @@ brw_reg(enum brw_reg_file file, reg.abs = abs; reg.address_mode = BRW_ADDRESS_DIRECT; reg.pad0 = 0; - reg.subnr = subnr * type_sz(type); + reg.subnr = subnr * brw_type_size_bytes(type); reg.nr = nr; /* Could do better: If the reg is r5.3<0;1,0>, we probably want to @@ -531,7 +524,7 @@ byte_offset(struct brw_reg reg, unsigned bytes) static inline struct brw_reg suboffset(struct brw_reg reg, unsigned delta) { - return byte_offset(reg, delta * type_sz(reg.type)); + return byte_offset(reg, delta * brw_type_size_bytes(reg.type)); } /** Construct unsigned word[16] register */ @@ -1006,11 +999,11 @@ spread(struct brw_reg reg, unsigned s) static inline struct brw_reg subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i) { - unsigned scale = type_sz(reg.type) / type_sz(type); + unsigned scale = brw_type_size_bytes(reg.type) / brw_type_size_bytes(type); assert(scale >= 1 && i < scale); if (reg.file == IMM) { - unsigned bit_size = type_sz(type) * 8; + unsigned bit_size = brw_type_size_bits(type); reg.u64 >>= i * bit_size; reg.u64 &= BITFIELD64_MASK(bit_size); if (bit_size <= 16) @@ -1238,17 +1231,17 @@ static inline unsigned element_sz(struct brw_reg reg) { if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) { - return type_sz(reg.type); + return brw_type_size_bytes(reg.type); } else if (reg.width == BRW_WIDTH_1 && reg.hstride == BRW_HORIZONTAL_STRIDE_0) { assert(reg.vstride != BRW_VERTICAL_STRIDE_0); - return type_sz(reg.type) << (reg.vstride - 1); + return brw_type_size_bytes(reg.type) << (reg.vstride - 1); } else { assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0); assert(reg.vstride == reg.hstride + reg.width); - return type_sz(reg.type) << (reg.hstride - 1); + return brw_type_size_bytes(reg.type) << (reg.hstride - 1); } } diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c index eb142764531..47fc320cd61 100644 --- a/src/intel/compiler/brw_reg_type.c +++ b/src/intel/compiler/brw_reg_type.c @@ -283,15 +283,6 @@ brw_a1_hw_3src_type_to_reg_type(const struct intel_device_info *devinfo, } } -/** - * Return the element size given a register type. - */ -unsigned -brw_reg_type_to_size(enum brw_reg_type type) -{ - return brw_type_size_bytes(type); -} - /** * Converts a BRW_TYPE_* enum to a short string (F, UD, and so on). * diff --git a/src/intel/compiler/brw_reg_type.h b/src/intel/compiler/brw_reg_type.h index b33725d8b99..194f09d9598 100644 --- a/src/intel/compiler/brw_reg_type.h +++ b/src/intel/compiler/brw_reg_type.h @@ -177,9 +177,6 @@ enum brw_reg_type brw_a1_hw_3src_type_to_reg_type(const struct intel_device_info *devinfo, unsigned hw_type, unsigned exec_type); -unsigned -brw_reg_type_to_size(enum brw_reg_type type); - const char * brw_reg_type_to_letters(enum brw_reg_type type); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index d0cb9bc8c49..fb4fd3b4d42 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -39,7 +39,7 @@ fs_reg_saturate_immediate(fs_reg *reg) double df; } imm, sat_imm = { 0 }; - const unsigned size = type_sz(reg->type); + const unsigned size = brw_type_size_bytes(reg->type); /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise * irrelevant, so just check the size of the type and copy from/to an @@ -188,7 +188,7 @@ fs_reg::is_zero() const if (file != IMM) return false; - assert(type_sz(type) > 1); + assert(brw_type_size_bytes(type) > 1); switch (type) { case BRW_TYPE_HF: @@ -219,7 +219,7 @@ fs_reg::is_one() const if (file != IMM) return false; - assert(type_sz(type) > 1); + assert(brw_type_size_bytes(type) > 1); switch (type) { case BRW_TYPE_HF: @@ -250,7 +250,7 @@ fs_reg::is_negative_one() const if (file != IMM) return false; - assert(type_sz(type) > 1); + assert(brw_type_size_bytes(type) > 1); switch (type) { case BRW_TYPE_HF: @@ -302,7 +302,7 @@ fs_inst::is_commutative() const * commutative. The DW source must be first. */ return !brw_type_is_int(src[0].type) || - type_sz(src[0].type) == type_sz(src[1].type); + brw_type_size_bits(src[0].type) == brw_type_size_bits(src[1].type); case BRW_OPCODE_SEL: /* MIN and MAX are commutative. */