mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*
Both of these helpers do the same thing. We now have brw_type_size_bits and brw_type_size_bytes and can use whichever makes sense in that place. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>
This commit is contained in:
parent
c22f44ff07
commit
545bb8fb6f
29 changed files with 300 additions and 289 deletions
|
|
@ -838,7 +838,7 @@ dest(FILE *file, const struct brw_isa_info *isa, const brw_inst *inst)
|
|||
{
|
||||
const struct intel_device_info *devinfo = isa->devinfo;
|
||||
enum brw_reg_type type = brw_inst_dst_type(devinfo, inst);
|
||||
unsigned elem_size = brw_reg_type_to_size(type);
|
||||
unsigned elem_size = brw_type_size_bytes(type);
|
||||
int err = 0;
|
||||
|
||||
if (is_split_send(devinfo, brw_inst_opcode(isa, inst))) {
|
||||
|
|
@ -945,7 +945,7 @@ dest_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
type = brw_inst_3src_a16_dst_type(devinfo, inst);
|
||||
subreg_nr = brw_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4;
|
||||
}
|
||||
subreg_nr /= brw_reg_type_to_size(type);
|
||||
subreg_nr /= brw_type_size_bytes(type);
|
||||
|
||||
if (subreg_nr)
|
||||
format(file, ".%u", subreg_nr);
|
||||
|
|
@ -1019,7 +1019,7 @@ src_da1(FILE *file,
|
|||
if (err == -1)
|
||||
return 0;
|
||||
if (sub_reg_num) {
|
||||
unsigned elem_size = brw_reg_type_to_size(type);
|
||||
unsigned elem_size = brw_type_size_bytes(type);
|
||||
format(file, ".%d", sub_reg_num / elem_size); /* use formal style like spec */
|
||||
}
|
||||
src_align1_region(file, _vert_stride, _width, _horiz_stride);
|
||||
|
|
@ -1106,7 +1106,7 @@ src_da16(FILE *file,
|
|||
if (err == -1)
|
||||
return 0;
|
||||
if (_subreg_nr) {
|
||||
unsigned elem_size = brw_reg_type_to_size(type);
|
||||
unsigned elem_size = brw_type_size_bytes(type);
|
||||
|
||||
/* bit4 for subreg number byte addressing. Make this same meaning as
|
||||
in da1 case, so output looks consistent. */
|
||||
|
|
@ -1272,7 +1272,7 @@ src0_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
_width == BRW_WIDTH_1 &&
|
||||
_horiz_stride == BRW_HORIZONTAL_STRIDE_0;
|
||||
|
||||
subreg_nr /= brw_reg_type_to_size(type);
|
||||
subreg_nr /= brw_type_size_bytes(type);
|
||||
|
||||
err |= control(file, "negate", m_negate,
|
||||
brw_inst_3src_src0_negate(devinfo, inst), NULL);
|
||||
|
|
@ -1346,7 +1346,7 @@ src1_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
_width == BRW_WIDTH_1 &&
|
||||
_horiz_stride == BRW_HORIZONTAL_STRIDE_0;
|
||||
|
||||
subreg_nr /= brw_reg_type_to_size(type);
|
||||
subreg_nr /= brw_type_size_bytes(type);
|
||||
|
||||
err |= control(file, "negate", m_negate,
|
||||
brw_inst_3src_src1_negate(devinfo, inst), NULL);
|
||||
|
|
@ -1434,7 +1434,7 @@ src2_3src(FILE *file, const struct intel_device_info *devinfo,
|
|||
_width == BRW_WIDTH_1 &&
|
||||
_horiz_stride == BRW_HORIZONTAL_STRIDE_0;
|
||||
|
||||
subreg_nr /= brw_reg_type_to_size(type);
|
||||
subreg_nr /= brw_type_size_bytes(type);
|
||||
|
||||
err |= control(file, "negate", m_negate,
|
||||
brw_inst_3src_src2_negate(devinfo, inst), NULL);
|
||||
|
|
@ -2009,7 +2009,7 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
|
|||
} else if (!is_send(opcode) &&
|
||||
(devinfo->ver < 12 ||
|
||||
brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE ||
|
||||
type_sz(brw_inst_src0_type(devinfo, inst)) < 8)) {
|
||||
brw_type_size_bytes(brw_inst_src0_type(devinfo, inst)) < 8)) {
|
||||
err |= control(file, "conditional modifier", conditional_modifier,
|
||||
brw_inst_cond_modifier(devinfo, inst), NULL);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
|||
*/
|
||||
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||
dest.nr == BRW_ARF_NULL &&
|
||||
type_sz(dest.type) == 1 &&
|
||||
brw_type_size_bytes(dest.type) == 1 &&
|
||||
dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
|
||||
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
|
||||
}
|
||||
|
|
@ -187,7 +187,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
else
|
||||
brw_inst_set_imm_ud(devinfo, inst, reg.ud);
|
||||
|
||||
if (devinfo->ver < 12 && type_sz(reg.type) < 8) {
|
||||
if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
|
||||
brw_inst_set_src1_reg_file(devinfo, inst,
|
||||
BRW_ARCHITECTURE_REGISTER_FILE);
|
||||
brw_inst_set_src1_reg_hw_type(devinfo, inst,
|
||||
|
|
@ -288,7 +288,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE) {
|
||||
/* two-argument instructions can only use 32-bit immediates */
|
||||
assert(type_sz(reg.type) < 8);
|
||||
assert(brw_type_size_bytes(reg.type) < 8);
|
||||
brw_inst_set_imm_ud(devinfo, inst, reg.ud);
|
||||
} else {
|
||||
/* This is a hardware restriction, which may or may not be lifted
|
||||
|
|
@ -486,8 +486,10 @@ brw_alu2(struct brw_codegen *p, unsigned opcode,
|
|||
struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
|
||||
{
|
||||
/* 64-bit immediates are only supported on 1-src instructions */
|
||||
assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4);
|
||||
assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4);
|
||||
assert(src0.file != BRW_IMMEDIATE_VALUE ||
|
||||
brw_type_size_bytes(src0.type) <= 4);
|
||||
assert(src1.file != BRW_IMMEDIATE_VALUE ||
|
||||
brw_type_size_bytes(src1.type) <= 4);
|
||||
|
||||
brw_inst *insn = next_insn(p, opcode);
|
||||
brw_set_dest(p, insn, dest);
|
||||
|
|
@ -1918,7 +1920,7 @@ brw_broadcast(struct brw_codegen *p,
|
|||
const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
|
||||
src = stride(suboffset(src, i), 0, 1, 0);
|
||||
|
||||
if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
|
||||
if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
|
||||
brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
|
||||
subscript(src, BRW_TYPE_D, 0));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
|
|
@ -1956,7 +1958,7 @@ brw_broadcast(struct brw_codegen *p,
|
|||
/* Take into account the component size and horizontal stride. */
|
||||
assert(src.vstride == src.hstride + src.width);
|
||||
brw_SHL(p, addr, vec1(idx),
|
||||
brw_imm_ud(util_logbase2(type_sz(src.type)) +
|
||||
brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
|
||||
src.hstride - 1));
|
||||
|
||||
/* We can only address up to limit bytes using the indirect
|
||||
|
|
@ -1974,7 +1976,7 @@ brw_broadcast(struct brw_codegen *p,
|
|||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
/* Use indirect addressing to fetch the specified component. */
|
||||
if (type_sz(src.type) > 4 &&
|
||||
if (brw_type_size_bytes(src.type) > 4 &&
|
||||
(intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
|
||||
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
||||
*
|
||||
|
|
@ -2117,8 +2119,8 @@ brw_MOV_reloc_imm(struct brw_codegen *p,
|
|||
enum brw_reg_type src_type,
|
||||
uint32_t id)
|
||||
{
|
||||
assert(type_sz(src_type) == 4);
|
||||
assert(type_sz(dst.type) == 4);
|
||||
assert(brw_type_size_bytes(src_type) == 4);
|
||||
assert(brw_type_size_bytes(dst.type) == 4);
|
||||
|
||||
brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
|
||||
p->next_insn_offset, 0);
|
||||
|
|
|
|||
|
|
@ -629,12 +629,14 @@ is_byte_conversion(const struct brw_isa_info *isa,
|
|||
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||
|
||||
if (dst_type != src0_type &&
|
||||
(type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
|
||||
(brw_type_size_bytes(dst_type) == 1 ||
|
||||
brw_type_size_bytes(src0_type) == 1)) {
|
||||
return true;
|
||||
} else if (num_sources > 1) {
|
||||
enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
|
||||
return dst_type != src1_type &&
|
||||
(type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
|
||||
(brw_type_size_bytes(dst_type) == 1 ||
|
||||
brw_type_size_bytes(src1_type) == 1);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -664,13 +666,13 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
* a D or UD, so it is allowed.
|
||||
*/
|
||||
if (num_sources == 3 && brw_inst_opcode(isa, inst) != BRW_OPCODE_DPAS) {
|
||||
ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
|
||||
brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
|
||||
ERROR_IF(brw_type_size_bytes(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
|
||||
brw_type_size_bytes(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
|
||||
"Byte data type is not supported for src1/2 register regioning. This includes "
|
||||
"byte broadcast as well.");
|
||||
}
|
||||
if (num_sources == 2) {
|
||||
ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
|
||||
ERROR_IF(brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 1,
|
||||
"Byte data type is not supported for src1 register regioning. This includes "
|
||||
"byte broadcast as well.");
|
||||
}
|
||||
|
|
@ -726,7 +728,7 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
!devinfo->has_64bit_int,
|
||||
"64-bit int source, but platform does not support it");
|
||||
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
|
||||
num_sources == 3 && type_sz(src_type) > 4) {
|
||||
num_sources == 3 && brw_type_size_bytes(src_type) > 4) {
|
||||
/* From the Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
|
||||
*
|
||||
* "This is applicable to 32b datatypes and 16b datatype. 64b
|
||||
|
|
@ -810,8 +812,8 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
}
|
||||
|
||||
unsigned exec_type = execution_type(isa, inst);
|
||||
unsigned exec_type_size = brw_reg_type_to_size(exec_type);
|
||||
unsigned dst_type_size = brw_reg_type_to_size(dst_type);
|
||||
unsigned exec_type_size = brw_type_size_bytes(exec_type);
|
||||
unsigned dst_type_size = brw_type_size_bytes(dst_type);
|
||||
|
||||
if (is_byte_conversion(isa, inst)) {
|
||||
/* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
|
||||
|
|
@ -827,14 +829,14 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
enum brw_reg_type src1_type = num_sources > 1 ?
|
||||
brw_inst_src1_type(devinfo, inst) : 0;
|
||||
|
||||
ERROR_IF(type_sz(dst_type) == 1 &&
|
||||
(type_sz(src0_type) == 8 ||
|
||||
(num_sources > 1 && type_sz(src1_type) == 8)),
|
||||
ERROR_IF(brw_type_size_bytes(dst_type) == 1 &&
|
||||
(brw_type_size_bytes(src0_type) == 8 ||
|
||||
(num_sources > 1 && brw_type_size_bytes(src1_type) == 8)),
|
||||
"There are no direct conversions between 64-bit types and B/UB");
|
||||
|
||||
ERROR_IF(type_sz(dst_type) == 8 &&
|
||||
(type_sz(src0_type) == 1 ||
|
||||
(num_sources > 1 && type_sz(src1_type) == 1)),
|
||||
ERROR_IF(brw_type_size_bytes(dst_type) == 8 &&
|
||||
(brw_type_size_bytes(src0_type) == 1 ||
|
||||
(num_sources > 1 && brw_type_size_bytes(src1_type) == 1)),
|
||||
"There are no direct conversions between 64-bit types and B/UB");
|
||||
}
|
||||
|
||||
|
|
@ -855,11 +857,11 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
|||
enum brw_reg_type src1_type = num_sources > 1 ?
|
||||
brw_inst_src1_type(devinfo, inst) : 0;
|
||||
ERROR_IF(dst_type == BRW_TYPE_HF &&
|
||||
(type_sz(src0_type) == 8 ||
|
||||
(num_sources > 1 && type_sz(src1_type) == 8)),
|
||||
(brw_type_size_bytes(src0_type) == 8 ||
|
||||
(num_sources > 1 && brw_type_size_bytes(src1_type) == 8)),
|
||||
"There are no direct conversions between 64-bit types and HF");
|
||||
|
||||
ERROR_IF(type_sz(dst_type) == 8 &&
|
||||
ERROR_IF(brw_type_size_bytes(dst_type) == 8 &&
|
||||
(src0_type == BRW_TYPE_HF ||
|
||||
(num_sources > 1 && src1_type == BRW_TYPE_HF)),
|
||||
"There are no direct conversions between 64-bit types and HF");
|
||||
|
|
@ -1023,7 +1025,7 @@ general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
|
|||
width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
|
||||
hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
|
||||
type = brw_inst_src ## n ## _type(devinfo, inst); \
|
||||
element_size = brw_reg_type_to_size(type); \
|
||||
element_size = brw_type_size_bytes(type); \
|
||||
subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
|
||||
|
||||
if (i == 0) {
|
||||
|
|
@ -1436,7 +1438,7 @@ region_alignment_rules(const struct brw_isa_info *isa,
|
|||
width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
|
||||
hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
|
||||
type = brw_inst_src ## n ## _type(devinfo, inst); \
|
||||
element_size = brw_reg_type_to_size(type); \
|
||||
element_size = brw_type_size_bytes(type); \
|
||||
subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
|
||||
align1_access_mask(src ## n ## _access_mask, \
|
||||
exec_size, element_size, subreg, \
|
||||
|
|
@ -1464,7 +1466,7 @@ region_alignment_rules(const struct brw_isa_info *isa,
|
|||
|
||||
unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
|
||||
enum brw_reg_type dst_type = inst_dst_type(isa, inst);
|
||||
unsigned element_size = brw_reg_type_to_size(dst_type);
|
||||
unsigned element_size = brw_type_size_bytes(dst_type);
|
||||
unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||
unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
|
||||
ERROR_IF(offset >= 64 * reg_unit(devinfo),
|
||||
|
|
@ -1530,7 +1532,7 @@ vector_immediate_restrictions(const struct brw_isa_info *isa,
|
|||
return (struct string){};
|
||||
|
||||
enum brw_reg_type dst_type = inst_dst_type(isa, inst);
|
||||
unsigned dst_type_size = brw_reg_type_to_size(dst_type);
|
||||
unsigned dst_type_size = brw_type_size_bytes(dst_type);
|
||||
unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
|
||||
brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
|
||||
unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
|
||||
|
|
@ -1592,11 +1594,11 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
return (struct string){};
|
||||
|
||||
enum brw_reg_type exec_type = execution_type(isa, inst);
|
||||
unsigned exec_type_size = brw_reg_type_to_size(exec_type);
|
||||
unsigned exec_type_size = brw_type_size_bytes(exec_type);
|
||||
|
||||
enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
|
||||
enum brw_reg_type dst_type = inst_dst_type(isa, inst);
|
||||
unsigned dst_type_size = brw_reg_type_to_size(dst_type);
|
||||
unsigned dst_type_size = brw_type_size_bytes(dst_type);
|
||||
unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
|
||||
unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
|
||||
unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
|
||||
|
|
@ -1629,7 +1631,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
|
||||
file = brw_inst_src ## n ## _reg_file(devinfo, inst); \
|
||||
type = brw_inst_src ## n ## _type(devinfo, inst); \
|
||||
type_size = brw_reg_type_to_size(type); \
|
||||
type_size = brw_type_size_bytes(type); \
|
||||
reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \
|
||||
subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
|
||||
address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
|
||||
|
|
@ -1758,7 +1760,7 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
* Quad-Word data must not be used."
|
||||
*/
|
||||
if (devinfo->verx10 >= 125 &&
|
||||
(brw_type_is_float(type) || type_sz(type) == 8)) {
|
||||
(brw_type_is_float(type) || brw_type_size_bytes(type) == 8)) {
|
||||
ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
|
||||
vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
|
||||
"Vx1 and VxH indirect addressing for Float, Half-Float, "
|
||||
|
|
@ -1777,8 +1779,8 @@ special_requirements_for_handling_double_precision_data_types(
|
|||
enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
|
||||
enum brw_reg_type src1_type =
|
||||
num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
|
||||
unsigned src0_type_size = brw_reg_type_to_size(src0_type);
|
||||
unsigned src1_type_size = brw_reg_type_to_size(src1_type);
|
||||
unsigned src0_type_size = brw_type_size_bytes(src0_type);
|
||||
unsigned src1_type_size = brw_type_size_bytes(src1_type);
|
||||
|
||||
ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
|
||||
dst_type_size == 8 &&
|
||||
|
|
@ -1820,17 +1822,20 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
if (devinfo->ver >= 12 &&
|
||||
brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
|
||||
enum brw_reg_type exec_type = execution_type(isa, inst);
|
||||
const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
|
||||
const bool src0_valid =
|
||||
brw_type_size_bytes(brw_inst_src0_type(devinfo, inst)) == 4 ||
|
||||
brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
|
||||
!(brw_inst_src0_negate(devinfo, inst) ||
|
||||
brw_inst_src0_abs(devinfo, inst));
|
||||
const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
|
||||
const bool src1_valid =
|
||||
brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 4 ||
|
||||
brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
|
||||
!(brw_inst_src1_negate(devinfo, inst) ||
|
||||
brw_inst_src1_abs(devinfo, inst));
|
||||
|
||||
ERROR_IF(!brw_type_is_float(exec_type) &&
|
||||
type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
|
||||
brw_type_size_bytes(exec_type) == 4 &&
|
||||
!(src0_valid && src1_valid),
|
||||
"When multiplying a DW and any lower precision integer, source "
|
||||
"modifier is not supported.");
|
||||
}
|
||||
|
|
@ -1861,7 +1866,8 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
* text.
|
||||
*/
|
||||
ERROR_IF(brw_type_is_int(src1_type) &&
|
||||
type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
|
||||
brw_type_size_bytes(src0_type) < 4 &&
|
||||
brw_type_size_bytes(src1_type) == 4,
|
||||
"When multiplying a DW and any lower precision integer, the "
|
||||
"DW operand must be src0.");
|
||||
|
||||
|
|
@ -2163,11 +2169,11 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
}
|
||||
|
||||
const unsigned src1_bits_per_element =
|
||||
(8 * brw_reg_type_to_size(src1_type)) >>
|
||||
brw_type_size_bits(src1_type) >>
|
||||
brw_inst_dpas_3src_src1_subbyte(devinfo, inst);
|
||||
|
||||
const unsigned src2_bits_per_element =
|
||||
(8 * brw_reg_type_to_size(src2_type)) >>
|
||||
brw_type_size_bits(src2_type) >>
|
||||
brw_inst_dpas_3src_src2_subbyte(devinfo, inst);
|
||||
|
||||
/* The MAX2(1, ...) is just to prevent possible division by 0 later. */
|
||||
|
|
@ -2208,16 +2214,16 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
|||
"Src2 subregister offset must be a multiple of SystolicDepth "
|
||||
"times OPS_PER_CHAN.");
|
||||
|
||||
ERROR_IF(dst_subnr * type_sz(dst_type) >= REG_SIZE,
|
||||
ERROR_IF(dst_subnr * brw_type_size_bytes(dst_type) >= REG_SIZE,
|
||||
"Destination subregister specifies next register.");
|
||||
|
||||
ERROR_IF(src0_subnr * type_sz(src0_type) >= REG_SIZE,
|
||||
ERROR_IF(src0_subnr * brw_type_size_bytes(src0_type) >= REG_SIZE,
|
||||
"Src0 subregister specifies next register.");
|
||||
|
||||
ERROR_IF((src1_subnr * type_sz(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE,
|
||||
ERROR_IF((src1_subnr * brw_type_size_bytes(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE,
|
||||
"Src1 subregister specifies next register.");
|
||||
|
||||
ERROR_IF((src2_subnr * type_sz(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE,
|
||||
ERROR_IF((src2_subnr * brw_type_size_bytes(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE,
|
||||
"Src2 subregister specifies next register.");
|
||||
|
||||
if (brw_inst_3src_atomic_control(devinfo, inst)) {
|
||||
|
|
|
|||
|
|
@ -436,13 +436,13 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
|
|||
if (devinfo->ver >= 12 && (opcode == BRW_OPCODE_MUL ||
|
||||
opcode == BRW_OPCODE_MAD)) {
|
||||
const brw_reg_type exec_type = get_exec_type(this);
|
||||
const unsigned min_type_sz = opcode == BRW_OPCODE_MAD ?
|
||||
MIN2(type_sz(src[1].type), type_sz(src[2].type)) :
|
||||
MIN2(type_sz(src[0].type), type_sz(src[1].type));
|
||||
const unsigned min_brw_type_size_bytes = opcode == BRW_OPCODE_MAD ?
|
||||
MIN2(brw_type_size_bytes(src[1].type), brw_type_size_bytes(src[2].type)) :
|
||||
MIN2(brw_type_size_bytes(src[0].type), brw_type_size_bytes(src[1].type));
|
||||
|
||||
if (brw_type_is_int(exec_type) &&
|
||||
type_sz(exec_type) >= 4 &&
|
||||
type_sz(exec_type) != min_type_sz)
|
||||
brw_type_size_bytes(exec_type) >= 4 &&
|
||||
brw_type_size_bytes(exec_type) != min_brw_type_size_bytes)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -614,9 +614,9 @@ fs_reg::component_size(unsigned width) const
|
|||
const unsigned vs = vstride ? 1 << (vstride - 1) : 0;
|
||||
const unsigned hs = hstride ? 1 << (hstride - 1) : 0;
|
||||
assert(w > 0);
|
||||
return ((MAX2(1, h) - 1) * vs + (w - 1) * hs + 1) * type_sz(type);
|
||||
return ((MAX2(1, h) - 1) * vs + (w - 1) * hs + 1) * brw_type_size_bytes(type);
|
||||
} else {
|
||||
return MAX2(width * stride, 1) * type_sz(type);
|
||||
return MAX2(width * stride, 1) * brw_type_size_bytes(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -708,7 +708,7 @@ fs_inst::is_partial_write() const
|
|||
return this->size_written < 32;
|
||||
}
|
||||
|
||||
return this->exec_size * type_sz(this->dst.type) < 32 ||
|
||||
return this->exec_size * brw_type_size_bytes(this->dst.type) < 32 ||
|
||||
!this->dst.is_contiguous();
|
||||
}
|
||||
|
||||
|
|
@ -963,7 +963,7 @@ fs_inst::size_read(int arg) const
|
|||
switch (src[arg].file) {
|
||||
case UNIFORM:
|
||||
case IMM:
|
||||
return components_read(arg) * type_sz(src[arg].type);
|
||||
return components_read(arg) * brw_type_size_bytes(src[arg].type);
|
||||
case BAD_FILE:
|
||||
case ARF:
|
||||
case FIXED_GRF:
|
||||
|
|
@ -1768,7 +1768,7 @@ fs_visitor::assign_urb_setup()
|
|||
* cross-channel access in the representation above are
|
||||
* disallowed.
|
||||
*/
|
||||
assert(inst->src[i].stride * type_sz(inst->src[i].type) == chan_sz);
|
||||
assert(inst->src[i].stride * brw_type_size_bytes(inst->src[i].type) == chan_sz);
|
||||
|
||||
/* Number of channels processing the same polygon. */
|
||||
const unsigned poly_width = dispatch_width / max_polygons;
|
||||
|
|
@ -1791,7 +1791,7 @@ fs_visitor::assign_urb_setup()
|
|||
* are stored a GRF apart on the thread payload, so
|
||||
* use that as vertical stride.
|
||||
*/
|
||||
const unsigned vstride = reg_size / type_sz(inst->src[i].type);
|
||||
const unsigned vstride = reg_size / brw_type_size_bytes(inst->src[i].type);
|
||||
assert(vstride <= 32);
|
||||
assert(chan % poly_width == 0);
|
||||
reg = stride(reg, vstride, poly_width, 0);
|
||||
|
|
@ -1851,7 +1851,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
|
|||
*/
|
||||
unsigned total_size = inst->exec_size *
|
||||
inst->src[i].stride *
|
||||
type_sz(inst->src[i].type);
|
||||
brw_type_size_bytes(inst->src[i].type);
|
||||
|
||||
assert(total_size <= 2 * REG_SIZE);
|
||||
const unsigned exec_size =
|
||||
|
|
@ -2517,7 +2517,7 @@ fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file) const
|
|||
case FIXED_GRF:
|
||||
fprintf(file, "g%d", inst->dst.nr);
|
||||
if (inst->dst.subnr != 0)
|
||||
fprintf(file, ".%d", inst->dst.subnr / type_sz(inst->dst.type));
|
||||
fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type));
|
||||
break;
|
||||
case BAD_FILE:
|
||||
fprintf(file, "(null)");
|
||||
|
|
@ -2658,7 +2658,7 @@ fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file) const
|
|||
if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) {
|
||||
assert(inst->src[i].offset == 0);
|
||||
|
||||
fprintf(file, ".%d", inst->src[i].subnr / type_sz(inst->src[i].type));
|
||||
fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type));
|
||||
} else if (inst->src[i].offset ||
|
||||
(inst->src[i].file == VGRF &&
|
||||
alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ namespace brw {
|
|||
|
||||
if (n > 0)
|
||||
return fs_reg(VGRF, shader->alloc.allocate(
|
||||
DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
|
||||
DIV_ROUND_UP(n * brw_type_size_bytes(type) * dispatch_width(),
|
||||
unit * REG_SIZE) * unit),
|
||||
type);
|
||||
else
|
||||
|
|
@ -476,7 +476,7 @@ namespace brw {
|
|||
/* The instruction splitting code isn't advanced enough to split
|
||||
* these so we need to handle that ourselves.
|
||||
*/
|
||||
if (dispatch_width() * type_sz(tmp.type) > 2 * REG_SIZE) {
|
||||
if (dispatch_width() * brw_type_size_bytes(tmp.type) > 2 * REG_SIZE) {
|
||||
const unsigned half_width = dispatch_width() / 2;
|
||||
const fs_builder ubld = exec_all().group(half_width, 0);
|
||||
fs_reg left = tmp;
|
||||
|
|
@ -496,7 +496,7 @@ namespace brw {
|
|||
}
|
||||
|
||||
if (cluster_size > 2) {
|
||||
if (type_sz(tmp.type) <= 4) {
|
||||
if (brw_type_size_bytes(tmp.type) <= 4) {
|
||||
const fs_builder ubld =
|
||||
exec_all().group(dispatch_width() / 4, 0);
|
||||
ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 2, 4);
|
||||
|
|
@ -747,7 +747,7 @@ namespace brw {
|
|||
inst->header_size = header_size;
|
||||
inst->size_written = header_size * REG_SIZE;
|
||||
for (unsigned i = header_size; i < sources; i++) {
|
||||
inst->size_written += dispatch_width() * type_sz(src[i].type) *
|
||||
inst->size_written += dispatch_width() * brw_type_size_bytes(src[i].type) *
|
||||
dst.stride;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -341,7 +341,8 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
|||
if (!brw_type_is_float(inst->dst.type))
|
||||
break;
|
||||
|
||||
if (type_sz(scan_inst->dst.type) > type_sz(inst->dst.type))
|
||||
if (brw_type_size_bits(scan_inst->dst.type) >
|
||||
brw_type_size_bits(inst->dst.type))
|
||||
break;
|
||||
} else {
|
||||
/* If the destination type of scan_inst is integer, then:
|
||||
|
|
@ -360,11 +361,12 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
|||
* as the destination of inst and the same signedness.
|
||||
*/
|
||||
if (!brw_type_is_int(inst->src[0].type) ||
|
||||
type_sz(scan_inst->dst.type) != type_sz(inst->src[0].type))
|
||||
brw_type_size_bits(scan_inst->dst.type) != brw_type_size_bits(inst->src[0].type))
|
||||
break;
|
||||
|
||||
if (brw_type_is_int(inst->dst.type)) {
|
||||
if (type_sz(inst->dst.type) < type_sz(scan_inst->dst.type))
|
||||
if (brw_type_size_bits(inst->dst.type) <
|
||||
brw_type_size_bits(scan_inst->dst.type))
|
||||
break;
|
||||
|
||||
if (inst->conditional_mod != BRW_CONDITIONAL_Z &&
|
||||
|
|
@ -388,7 +390,8 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
|||
/* Comparison result may be altered if the bit-size changes
|
||||
* since that affects range, denorms, etc
|
||||
*/
|
||||
if (type_sz(scan_inst->dst.type) != type_sz(inst->dst.type))
|
||||
if (brw_type_size_bits(scan_inst->dst.type) !=
|
||||
brw_type_size_bits(inst->dst.type))
|
||||
break;
|
||||
|
||||
if (brw_type_is_float(scan_inst->dst.type) !=
|
||||
|
|
|
|||
|
|
@ -1081,7 +1081,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
|
|||
unsigned box_idx = box_instruction(table, const_ctx, inst, ip, block);
|
||||
|
||||
v->value.u64 = inst->src[i].d64;
|
||||
v->bit_size = 8 * type_sz(inst->src[i].type);
|
||||
v->bit_size = brw_type_size_bits(inst->src[i].type);
|
||||
v->instr_index = box_idx;
|
||||
v->src = i;
|
||||
v->allow_one_constant = allow_one_constant;
|
||||
|
|
@ -1570,7 +1570,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
struct brw_reg imm_reg = build_imm_reg_for_copy(imm);
|
||||
|
||||
/* Ensure we have enough space in the register to copy the immediate */
|
||||
assert(reg.offset + type_sz(imm_reg.type) * width <= REG_SIZE);
|
||||
assert(reg.offset + brw_type_size_bytes(imm_reg.type) * width <= REG_SIZE);
|
||||
|
||||
ibld.MOV(retype(reg, imm_reg.type), imm_reg);
|
||||
}
|
||||
|
|
@ -1585,11 +1585,11 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
if (link->type == either_type) {
|
||||
/* Do not change the register type. */
|
||||
} else if (link->type == integer_only) {
|
||||
reg->type = brw_int_type(type_sz(reg->type), true);
|
||||
reg->type = brw_int_type(brw_type_size_bytes(reg->type), true);
|
||||
} else {
|
||||
assert(link->type == float_only);
|
||||
|
||||
switch (type_sz(reg->type)) {
|
||||
switch (brw_type_size_bytes(reg->type)) {
|
||||
case 2:
|
||||
reg->type = BRW_TYPE_HF;
|
||||
break;
|
||||
|
|
@ -1606,7 +1606,7 @@ brw_fs_opt_combine_constants(fs_visitor &s)
|
|||
} else if ((link->inst->opcode == BRW_OPCODE_SHL ||
|
||||
link->inst->opcode == BRW_OPCODE_ASR) &&
|
||||
link->negate) {
|
||||
reg->type = brw_int_type(type_sz(reg->type), true);
|
||||
reg->type = brw_int_type(brw_type_size_bytes(reg->type), true);
|
||||
}
|
||||
|
||||
#if MESA_DEBUG
|
||||
|
|
|
|||
|
|
@ -591,8 +591,8 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
|||
* would break this restriction.
|
||||
*/
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst, dst_type) &&
|
||||
!(type_sz(inst->src[arg].type) * stride ==
|
||||
type_sz(dst_type) * inst->dst.stride ||
|
||||
!(brw_type_size_bytes(inst->src[arg].type) * stride ==
|
||||
brw_type_size_bytes(dst_type) * inst->dst.stride ||
|
||||
stride == 0))
|
||||
return false;
|
||||
|
||||
|
|
@ -607,7 +607,7 @@ can_take_stride(fs_inst *inst, brw_reg_type dst_type,
|
|||
* cannot use the replicate control.
|
||||
*/
|
||||
if (inst->is_3src(compiler)) {
|
||||
if (type_sz(inst->src[arg].type) > 4)
|
||||
if (brw_type_size_bytes(inst->src[arg].type) > 4)
|
||||
return stride == 1;
|
||||
else
|
||||
return stride == 1 || stride == 0;
|
||||
|
|
@ -825,7 +825,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* destination of the copy, and simply replacing the sources would give a
|
||||
* program with different semantics.
|
||||
*/
|
||||
if ((type_sz(entry->dst.type) < type_sz(inst->src[arg].type) ||
|
||||
if ((brw_type_size_bits(entry->dst.type) < brw_type_size_bits(inst->src[arg].type) ||
|
||||
entry->is_partial_write) &&
|
||||
inst->opcode != BRW_OPCODE_MOV) {
|
||||
return false;
|
||||
|
|
@ -846,7 +846,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
*/
|
||||
if (entry_stride != 1 &&
|
||||
(inst->src[arg].stride *
|
||||
type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
|
||||
brw_type_size_bytes(inst->src[arg].type)) % brw_type_size_bytes(entry->src.type) != 0)
|
||||
return false;
|
||||
|
||||
/* Since semantics of source modifiers are type-dependent we need to
|
||||
|
|
@ -858,7 +858,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
if (has_source_modifiers &&
|
||||
entry->dst.type != inst->src[arg].type &&
|
||||
(!inst->can_change_types() ||
|
||||
type_sz(entry->dst.type) != type_sz(inst->src[arg].type)))
|
||||
brw_type_size_bits(entry->dst.type) != brw_type_size_bits(inst->src[arg].type)))
|
||||
return false;
|
||||
|
||||
if ((entry->src.negate || entry->src.abs) &&
|
||||
|
|
@ -881,8 +881,9 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
if (entry->src.file == FIXED_GRF) {
|
||||
if (inst->src[arg].stride) {
|
||||
const unsigned orig_width = 1 << entry->src.width;
|
||||
const unsigned reg_width = REG_SIZE / (type_sz(inst->src[arg].type) *
|
||||
inst->src[arg].stride);
|
||||
const unsigned reg_width =
|
||||
REG_SIZE / (brw_type_size_bytes(inst->src[arg].type) *
|
||||
inst->src[arg].stride);
|
||||
inst->src[arg].width = cvt(MIN2(orig_width, reg_width)) - 1;
|
||||
inst->src[arg].hstride = cvt(inst->src[arg].stride);
|
||||
inst->src[arg].vstride = inst->src[arg].hstride + inst->src[arg].width;
|
||||
|
|
@ -904,14 +905,14 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* reading, and the base byte offset within that component.
|
||||
*/
|
||||
assert(entry->dst.stride == 1);
|
||||
const unsigned component = rel_offset / type_sz(entry->dst.type);
|
||||
const unsigned suboffset = rel_offset % type_sz(entry->dst.type);
|
||||
const unsigned component = rel_offset / brw_type_size_bytes(entry->dst.type);
|
||||
const unsigned suboffset = rel_offset % brw_type_size_bytes(entry->dst.type);
|
||||
|
||||
/* Calculate the byte offset at the origin of the copy of the given
|
||||
* component and suboffset.
|
||||
*/
|
||||
inst->src[arg] = byte_offset(inst->src[arg],
|
||||
component * entry_stride * type_sz(entry->src.type) + suboffset);
|
||||
component * entry_stride * brw_type_size_bytes(entry->src.type) + suboffset);
|
||||
|
||||
if (has_source_modifiers) {
|
||||
if (entry->dst.type != inst->src[arg].type) {
|
||||
|
|
@ -941,7 +942,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
{
|
||||
bool progress = false;
|
||||
|
||||
if (type_sz(entry->src.type) > 4)
|
||||
if (brw_type_size_bytes(entry->src.type) > 4)
|
||||
return false;
|
||||
|
||||
if (inst->src[arg].file != VGRF)
|
||||
|
|
@ -962,7 +963,8 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* type, the entry doesn't contain all of the data that the user is
|
||||
* trying to use.
|
||||
*/
|
||||
if (type_sz(inst->src[arg].type) > type_sz(entry->dst.type))
|
||||
if (brw_type_size_bits(inst->src[arg].type) >
|
||||
brw_type_size_bits(entry->dst.type))
|
||||
return false;
|
||||
|
||||
fs_reg val = entry->src;
|
||||
|
|
@ -976,8 +978,10 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* ...
|
||||
* mul(8) g47<1>D g86<8,8,1>D g12<16,8,2>W
|
||||
*/
|
||||
if (type_sz(inst->src[arg].type) < type_sz(entry->dst.type)) {
|
||||
if (type_sz(inst->src[arg].type) != 2 || type_sz(entry->dst.type) != 4)
|
||||
if (brw_type_size_bits(inst->src[arg].type) <
|
||||
brw_type_size_bits(entry->dst.type)) {
|
||||
if (brw_type_size_bytes(inst->src[arg].type) != 2 ||
|
||||
brw_type_size_bytes(entry->dst.type) != 4)
|
||||
return false;
|
||||
|
||||
assert(inst->src[arg].subnr == 0 || inst->src[arg].subnr == 2);
|
||||
|
|
@ -1059,7 +1063,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
|||
* will now "fix" the constant.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_MUL &&
|
||||
type_sz(inst->src[1].type) < 4 &&
|
||||
brw_type_size_bytes(inst->src[1].type) < 4 &&
|
||||
(inst->src[0].type == BRW_TYPE_D ||
|
||||
inst->src[0].type == BRW_TYPE_UD)) {
|
||||
inst->src[0] = val;
|
||||
|
|
@ -1352,8 +1356,8 @@ opt_copy_propagation_local(const brw_compiler *compiler, linear_ctx *lin_ctx,
|
|||
int offset = 0;
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
|
||||
const unsigned size_written = effective_width *
|
||||
type_sz(inst->src[i].type);
|
||||
const unsigned size_written =
|
||||
effective_width * brw_type_size_bytes(inst->src[i].type);
|
||||
if (inst->src[i].file == VGRF ||
|
||||
(inst->src[i].file == FIXED_GRF &&
|
||||
inst->src[i].is_contiguous())) {
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
|
||||
reg.nr = imm_byte_offset / REG_SIZE;
|
||||
reg.subnr = imm_byte_offset % REG_SIZE;
|
||||
if (type_sz(reg.type) > 4 && !devinfo->has_64bit_int) {
|
||||
if (brw_type_size_bytes(reg.type) > 4 && !devinfo->has_64bit_int) {
|
||||
brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
|
||||
subscript(reg, BRW_TYPE_D, 0));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
|
|
@ -298,7 +298,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
else
|
||||
brw_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl);
|
||||
|
||||
if (type_sz(reg.type) > 4 &&
|
||||
if (brw_type_size_bytes(reg.type) > 4 &&
|
||||
(intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
|
||||
/* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
||||
*
|
||||
|
|
@ -338,7 +338,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
/* Ivy bridge has some strange behavior that makes this a real pain to
|
||||
* implement for 64-bit values so we just don't bother.
|
||||
*/
|
||||
assert(devinfo->has_64bit_float || type_sz(src.type) <= 4);
|
||||
assert(devinfo->has_64bit_float || brw_type_size_bytes(src.type) <= 4);
|
||||
|
||||
/* Gen12.5 adds the following region restriction:
|
||||
*
|
||||
|
|
@ -388,8 +388,8 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
group_idx.vstride--;
|
||||
}
|
||||
|
||||
assert(type_sz(group_idx.type) <= 4);
|
||||
if (type_sz(group_idx.type) == 4) {
|
||||
assert(brw_type_size_bytes(group_idx.type) <= 4);
|
||||
if (brw_type_size_bytes(group_idx.type) == 4) {
|
||||
/* The destination stride of an instruction (in bytes) must be
|
||||
* greater than or equal to the size of the rest of the
|
||||
* instruction. Since the address register is of type UW, we
|
||||
|
|
@ -438,7 +438,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
/* Take into account the component size and horizontal stride. */
|
||||
assert(src.vstride == src.hstride + src.width);
|
||||
insn = brw_SHL(p, addr, group_idx,
|
||||
brw_imm_uw(util_logbase2(type_sz(src.type)) +
|
||||
brw_imm_uw(util_logbase2(brw_type_size_bytes(src.type)) +
|
||||
src.hstride - 1));
|
||||
if (devinfo->ver >= 12)
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
|
@ -468,7 +468,7 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst,
|
|||
/* The value is uniform across all channels */
|
||||
brw_MOV(p, dst, src);
|
||||
|
||||
} else if (devinfo->ver < 11 && type_sz(src.type) == 4) {
|
||||
} else if (devinfo->ver < 11 && brw_type_size_bytes(src.type) == 4) {
|
||||
/* This only works on 8-wide 32-bit values */
|
||||
assert(inst->exec_size == 8);
|
||||
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
|
|
@ -581,7 +581,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
|
|||
width = BRW_WIDTH_4;
|
||||
}
|
||||
|
||||
struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
|
||||
struct brw_reg src0 = byte_offset(src, brw_type_size_bytes(src.type));;
|
||||
struct brw_reg src1 = src;
|
||||
|
||||
src0.vstride = vstride;
|
||||
|
|
@ -602,7 +602,7 @@ void
|
|||
fs_generator::generate_ddy(const fs_inst *inst,
|
||||
struct brw_reg dst, struct brw_reg src)
|
||||
{
|
||||
const uint32_t type_size = type_sz(src.type);
|
||||
const uint32_t type_size = brw_type_size_bytes(src.type);
|
||||
|
||||
if (inst->opcode == FS_OPCODE_DDY_FINE) {
|
||||
/* produce accurate derivatives.
|
||||
|
|
@ -1118,7 +1118,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
case FS_OPCODE_PIXEL_X:
|
||||
assert(src[0].type == BRW_TYPE_UW);
|
||||
assert(src[1].type == BRW_TYPE_UW);
|
||||
src[0].subnr = 0 * type_sz(src[0].type);
|
||||
src[0].subnr = 0 * brw_type_size_bytes(src[0].type);
|
||||
if (src[1].file == BRW_IMMEDIATE_VALUE) {
|
||||
assert(src[1].ud == 0);
|
||||
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
||||
|
|
@ -1130,7 +1130,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
case FS_OPCODE_PIXEL_Y:
|
||||
assert(src[0].type == BRW_TYPE_UW);
|
||||
assert(src[1].type == BRW_TYPE_UW);
|
||||
src[0].subnr = 4 * type_sz(src[0].type);
|
||||
src[0].subnr = 4 * brw_type_size_bytes(src[0].type);
|
||||
if (src[1].file == BRW_IMMEDIATE_VALUE) {
|
||||
assert(src[1].ud == 0);
|
||||
brw_MOV(p, dst, stride(src[0], 8, 4, 1));
|
||||
|
|
@ -1244,7 +1244,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
case SHADER_OPCODE_SEL_EXEC:
|
||||
assert(inst->force_writemask_all);
|
||||
assert(devinfo->has_64bit_float || type_sz(dst.type) <= 4);
|
||||
assert(devinfo->has_64bit_float || brw_type_size_bytes(dst.type) <= 4);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, dst, src[1]);
|
||||
brw_set_default_mask_control(p, BRW_MASK_ENABLE);
|
||||
|
|
@ -1260,7 +1260,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST: {
|
||||
assert((!intel_device_info_is_9lp(devinfo) &&
|
||||
devinfo->has_64bit_float) || type_sz(src[0].type) <= 4);
|
||||
devinfo->has_64bit_float) || brw_type_size_bytes(src[0].type) <= 4);
|
||||
assert(!src[0].negate && !src[0].abs);
|
||||
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
||||
assert(src[1].type == BRW_TYPE_UD);
|
||||
|
|
|
|||
|
|
@ -624,7 +624,7 @@ brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst
|
|||
new_reg = brw_vec1_grf(reg->nr, 0);
|
||||
} else if (reg->stride > 4) {
|
||||
assert(reg != &inst->dst);
|
||||
assert(reg->stride * type_sz(reg->type) <= REG_SIZE);
|
||||
assert(reg->stride * brw_type_size_bytes(reg->type) <= REG_SIZE);
|
||||
new_reg = brw_vecn_grf(1, reg->nr, 0);
|
||||
new_reg = stride(new_reg, reg->stride, 1, 0);
|
||||
} else {
|
||||
|
|
@ -636,7 +636,8 @@ brw_fs_lower_vgrf_to_fixed_grf(const struct intel_device_info *devinfo, fs_inst
|
|||
*
|
||||
* The maximum width value that could satisfy this restriction is:
|
||||
*/
|
||||
const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type));
|
||||
const unsigned reg_width =
|
||||
REG_SIZE / (reg->stride * brw_type_size_bytes(reg->type));
|
||||
|
||||
/* Because the hardware can only split source regions at a whole
|
||||
* multiple of width during decompression (i.e. vertically), clamp
|
||||
|
|
|
|||
|
|
@ -424,7 +424,7 @@ brw_fs_lower_integer_multiplication(fs_visitor &s)
|
|||
/* If the instruction is already in a form that does not need lowering,
|
||||
* return early.
|
||||
*/
|
||||
if (type_sz(inst->src[1].type) < 4 && type_sz(inst->src[0].type) <= 4)
|
||||
if (brw_type_size_bytes(inst->src[1].type) < 4 && brw_type_size_bytes(inst->src[0].type) <= 4)
|
||||
continue;
|
||||
|
||||
if ((inst->dst.type == BRW_TYPE_Q ||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ namespace {
|
|||
bool
|
||||
is_byte_raw_mov(const fs_inst *inst)
|
||||
{
|
||||
return type_sz(inst->dst.type) == 1 &&
|
||||
return brw_type_size_bytes(inst->dst.type) == 1 &&
|
||||
inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->src[0].type == inst->dst.type &&
|
||||
!inst->saturate &&
|
||||
|
|
@ -55,17 +55,19 @@ namespace {
|
|||
unsigned i)
|
||||
{
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst)) {
|
||||
return MAX2(type_sz(inst->dst.type), byte_stride(inst->dst));
|
||||
return MAX2(brw_type_size_bytes(inst->dst.type),
|
||||
byte_stride(inst->dst));
|
||||
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst) &&
|
||||
type_sz(inst->src[i].type) < 4 && byte_stride(inst->src[i]) >= 4) {
|
||||
brw_type_size_bytes(inst->src[i].type) < 4 &&
|
||||
byte_stride(inst->src[i]) >= 4) {
|
||||
/* Use a stride of 32bits if possible, since that will guarantee that
|
||||
* the copy emitted to lower this region won't be affected by the
|
||||
* sub-dword integer region restrictions. This may not be possible
|
||||
* for the second source of an instruction if we're required to use
|
||||
* packed data due to Wa_16012383669.
|
||||
*/
|
||||
return (i == 1 ? type_sz(inst->src[i].type) : 4);
|
||||
return (i == 1 ? brw_type_size_bytes(inst->src[i].type) : 4);
|
||||
|
||||
} else {
|
||||
return byte_stride(inst->src[i]);
|
||||
|
|
@ -84,16 +86,17 @@ namespace {
|
|||
return reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||
|
||||
} else if (has_subdword_integer_region_restriction(devinfo, inst) &&
|
||||
type_sz(inst->src[i].type) < 4 && byte_stride(inst->src[i]) >= 4) {
|
||||
const unsigned dst_byte_stride = MAX2(byte_stride(inst->dst),
|
||||
type_sz(inst->dst.type));
|
||||
brw_type_size_bytes(inst->src[i].type) < 4 &&
|
||||
byte_stride(inst->src[i]) >= 4) {
|
||||
const unsigned dst_byte_stride =
|
||||
MAX2(byte_stride(inst->dst), brw_type_size_bytes(inst->dst.type));
|
||||
const unsigned src_byte_stride = required_src_byte_stride(devinfo, inst, i);
|
||||
const unsigned dst_byte_offset =
|
||||
reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||
const unsigned src_byte_offset =
|
||||
reg_offset(inst->src[i]) % (reg_unit(devinfo) * REG_SIZE);
|
||||
|
||||
if (src_byte_stride > type_sz(inst->src[i].type)) {
|
||||
if (src_byte_stride > brw_type_size_bytes(inst->src[i].type)) {
|
||||
assert(src_byte_stride >= dst_byte_stride);
|
||||
/* The source is affected by the Xe2+ sub-dword integer regioning
|
||||
* restrictions. For the case of source 0 BSpec#56640 specifies a
|
||||
|
|
@ -120,7 +123,7 @@ namespace {
|
|||
const unsigned m = 64 * dst_byte_stride / src_byte_stride;
|
||||
return dst_byte_offset % m * src_byte_stride / dst_byte_stride;
|
||||
} else {
|
||||
assert(src_byte_stride == type_sz(inst->src[i].type));
|
||||
assert(src_byte_stride == brw_type_size_bytes(inst->src[i].type));
|
||||
/* A packed source is required, likely due to the stricter
|
||||
* requirements of the second source region. The source being
|
||||
* packed guarantees that the region of the original instruction
|
||||
|
|
@ -159,8 +162,8 @@ namespace {
|
|||
* lowering pass will detect the mismatch in has_invalid_src_region
|
||||
* and fix the sources of the multiply instead of the destination.
|
||||
*/
|
||||
return inst->dst.hstride * type_sz(inst->dst.type);
|
||||
} else if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
|
||||
return inst->dst.hstride * brw_type_size_bytes(inst->dst.type);
|
||||
} else if (brw_type_size_bytes(inst->dst.type) < get_exec_type_size(inst) &&
|
||||
!is_byte_raw_mov(inst)) {
|
||||
return get_exec_type_size(inst);
|
||||
} else {
|
||||
|
|
@ -168,13 +171,13 @@ namespace {
|
|||
* size across all source and destination operands we are required to
|
||||
* lower.
|
||||
*/
|
||||
unsigned max_stride = inst->dst.stride * type_sz(inst->dst.type);
|
||||
unsigned min_size = type_sz(inst->dst.type);
|
||||
unsigned max_size = type_sz(inst->dst.type);
|
||||
unsigned max_stride = inst->dst.stride * brw_type_size_bytes(inst->dst.type);
|
||||
unsigned min_size = brw_type_size_bytes(inst->dst.type);
|
||||
unsigned max_size = brw_type_size_bytes(inst->dst.type);
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) {
|
||||
const unsigned size = type_sz(inst->src[i].type);
|
||||
const unsigned size = brw_type_size_bytes(inst->src[i].type);
|
||||
max_stride = MAX2(max_stride, inst->src[i].stride * size);
|
||||
min_size = MIN2(min_size, size);
|
||||
max_size = MAX2(max_size, size);
|
||||
|
|
@ -239,23 +242,23 @@ namespace {
|
|||
* don't support 64-bit types at all.
|
||||
*/
|
||||
if ((!devinfo->has_64bit_int ||
|
||||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
||||
intel_device_info_is_9lp(devinfo)) && brw_type_size_bytes(t) > 4)
|
||||
return BRW_TYPE_UD;
|
||||
else if (has_dst_aligned_region_restriction(devinfo, inst))
|
||||
return brw_int_type(type_sz(t), false);
|
||||
return brw_int_type(brw_type_size_bytes(t), false);
|
||||
else
|
||||
return t;
|
||||
|
||||
case SHADER_OPCODE_SEL_EXEC:
|
||||
if ((!has_64bit || devinfo->has_64bit_float_via_math_pipe) &&
|
||||
type_sz(t) > 4)
|
||||
brw_type_size_bytes(t) > 4)
|
||||
return BRW_TYPE_UD;
|
||||
else
|
||||
return t;
|
||||
|
||||
case SHADER_OPCODE_QUAD_SWIZZLE:
|
||||
if (has_dst_aligned_region_restriction(devinfo, inst))
|
||||
return brw_int_type(type_sz(t), false);
|
||||
return brw_int_type(brw_type_size_bytes(t), false);
|
||||
else
|
||||
return t;
|
||||
|
||||
|
|
@ -276,10 +279,10 @@ namespace {
|
|||
* support 64-bit types at all.
|
||||
*/
|
||||
if ((!has_64bit || devinfo->verx10 >= 125 ||
|
||||
intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4)
|
||||
intel_device_info_is_9lp(devinfo)) && brw_type_size_bytes(t) > 4)
|
||||
return BRW_TYPE_UD;
|
||||
else
|
||||
return brw_int_type(type_sz(t), false);
|
||||
return brw_int_type(brw_type_size_bytes(t), false);
|
||||
|
||||
default:
|
||||
return t;
|
||||
|
|
@ -336,7 +339,7 @@ namespace {
|
|||
const brw_reg_type exec_type = get_exec_type(inst);
|
||||
const unsigned dst_byte_offset = reg_offset(inst->dst) % (reg_unit(devinfo) * REG_SIZE);
|
||||
const bool is_narrowing_conversion = !is_byte_raw_mov(inst) &&
|
||||
type_sz(inst->dst.type) < type_sz(exec_type);
|
||||
brw_type_size_bytes(inst->dst.type) < brw_type_size_bytes(exec_type);
|
||||
|
||||
return (has_dst_aligned_region_restriction(devinfo, inst) &&
|
||||
(required_dst_byte_stride(inst) != byte_stride(inst->dst) ||
|
||||
|
|
@ -455,8 +458,8 @@ namespace brw {
|
|||
assert(v->devinfo->has_integer_dword_mul ||
|
||||
inst->opcode != BRW_OPCODE_MUL ||
|
||||
brw_type_is_float(get_exec_type(inst)) ||
|
||||
MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 ||
|
||||
type_sz(inst->src[i].type) == get_exec_type_size(inst));
|
||||
MIN2(brw_type_size_bytes(inst->src[0].type), brw_type_size_bytes(inst->src[1].type)) >= 4 ||
|
||||
brw_type_size_bytes(inst->src[i].type) == get_exec_type_size(inst));
|
||||
|
||||
const fs_builder ibld(v, block, inst);
|
||||
const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
|
||||
|
|
@ -488,8 +491,8 @@ namespace {
|
|||
* instructions into the program unnecessarily.
|
||||
*/
|
||||
const unsigned stride =
|
||||
type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 :
|
||||
type_sz(inst->dst.type) * inst->dst.stride / type_sz(type);
|
||||
brw_type_size_bytes(inst->dst.type) * inst->dst.stride <= brw_type_size_bytes(type) ? 1 :
|
||||
brw_type_size_bytes(inst->dst.type) * inst->dst.stride / brw_type_size_bytes(type);
|
||||
fs_reg tmp = ibld.vgrf(type, stride);
|
||||
ibld.UNDEF(tmp);
|
||||
tmp = horiz_stride(tmp, stride);
|
||||
|
|
@ -532,7 +535,7 @@ namespace {
|
|||
const intel_device_info *devinfo = v->devinfo;
|
||||
const fs_builder ibld(v, block, inst);
|
||||
const unsigned stride = required_src_byte_stride(devinfo, inst, i) /
|
||||
type_sz(inst->src[i].type);
|
||||
brw_type_size_bytes(inst->src[i].type);
|
||||
assert(stride > 0);
|
||||
/* Calculate the size of the temporary allocation manually instead of
|
||||
* relying on the builder, since we may have to add some amount of
|
||||
|
|
@ -541,7 +544,8 @@ namespace {
|
|||
*/
|
||||
const unsigned size =
|
||||
DIV_ROUND_UP(required_src_byte_offset(v->devinfo, inst, i) +
|
||||
inst->exec_size * stride * type_sz(inst->src[i].type),
|
||||
inst->exec_size * stride *
|
||||
brw_type_size_bytes(inst->src[i].type),
|
||||
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
|
||||
fs_reg tmp(VGRF, v->alloc.allocate(size), inst->src[i].type);
|
||||
ibld.UNDEF(tmp);
|
||||
|
|
@ -551,9 +555,9 @@ namespace {
|
|||
/* Emit a series of 32-bit integer copies with any source modifiers
|
||||
* cleaned up (because their semantics are dependent on the type).
|
||||
*/
|
||||
const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
|
||||
const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4),
|
||||
false);
|
||||
const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
|
||||
const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type);
|
||||
fs_reg raw_src = inst->src[i];
|
||||
raw_src.negate = false;
|
||||
raw_src.abs = false;
|
||||
|
|
@ -599,7 +603,7 @@ namespace {
|
|||
|
||||
const fs_builder ibld(v, block, inst);
|
||||
const unsigned stride = required_dst_byte_stride(inst) /
|
||||
type_sz(inst->dst.type);
|
||||
brw_type_size_bytes(inst->dst.type);
|
||||
assert(stride > 0);
|
||||
fs_reg tmp = ibld.vgrf(inst->dst.type, stride);
|
||||
ibld.UNDEF(tmp);
|
||||
|
|
@ -608,9 +612,9 @@ namespace {
|
|||
/* Emit a series of 32-bit integer copies from the temporary into the
|
||||
* original destination.
|
||||
*/
|
||||
const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
|
||||
const brw_reg_type raw_type = brw_int_type(MIN2(brw_type_size_bytes(tmp.type), 4),
|
||||
false);
|
||||
const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
|
||||
const unsigned n = brw_type_size_bytes(tmp.type) / brw_type_size_bytes(raw_type);
|
||||
|
||||
if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) {
|
||||
/* Note that in general we cannot simply predicate the copies on the
|
||||
|
|
@ -656,7 +660,7 @@ namespace {
|
|||
assert(inst->dst.type == get_exec_type(inst));
|
||||
const unsigned mask = has_invalid_exec_type(v->devinfo, inst);
|
||||
const brw_reg_type raw_type = required_exec_type(v->devinfo, inst);
|
||||
const unsigned n = get_exec_type_size(inst) / type_sz(raw_type);
|
||||
const unsigned n = get_exec_type_size(inst) / brw_type_size_bytes(raw_type);
|
||||
const fs_builder ibld(v, block, inst);
|
||||
|
||||
fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride);
|
||||
|
|
|
|||
|
|
@ -408,7 +408,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
|
|||
const unsigned swiz = inst->src[1].ud;
|
||||
return (is_uniform(inst->src[0]) ?
|
||||
get_fpu_lowered_simd_width(shader, inst) :
|
||||
devinfo->ver < 11 && type_sz(inst->src[0].type) == 4 ? 8 :
|
||||
devinfo->ver < 11 && brw_type_size_bytes(inst->src[0].type) == 4 ? 8 :
|
||||
swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
|
||||
get_fpu_lowered_simd_width(shader, inst));
|
||||
}
|
||||
|
|
@ -425,7 +425,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
|
|||
const unsigned max_size = 2 * REG_SIZE;
|
||||
/* Prior to Broadwell, we only have 8 address subregisters. */
|
||||
return MIN3(16,
|
||||
max_size / (inst->dst.stride * type_sz(inst->dst.type)),
|
||||
max_size / (inst->dst.stride * brw_type_size_bytes(inst->dst.type)),
|
||||
inst->exec_size);
|
||||
}
|
||||
|
||||
|
|
@ -440,7 +440,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
|
|||
*/
|
||||
assert(!inst->header_size);
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
assert(type_sz(inst->dst.type) == type_sz(inst->src[i].type) ||
|
||||
assert(brw_type_size_bits(inst->dst.type) == brw_type_size_bits(inst->src[i].type) ||
|
||||
inst->src[i].file == BAD_FILE);
|
||||
|
||||
return inst->exec_size / DIV_ROUND_UP(reg_count, 2);
|
||||
|
|
@ -465,7 +465,7 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
|
|||
(inst->components_read(i) == 1 &&
|
||||
lbld.dispatch_width() <= inst->exec_size)) ||
|
||||
(inst->flags_written(lbld.shader->devinfo) &
|
||||
brw_fs_flag_mask(inst->src[i], type_sz(inst->src[i].type)));
|
||||
brw_fs_flag_mask(inst->src[i], brw_type_size_bytes(inst->src[i].type)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -858,7 +858,7 @@ emit_fsign(nir_to_brw_state &ntb, const fs_builder &bld, const nir_alu_instr *in
|
|||
op[0] = offset(op[0], bld, fsign_instr->src[0].swizzle[channel]);
|
||||
}
|
||||
|
||||
if (type_sz(op[0].type) == 2) {
|
||||
if (brw_type_size_bytes(op[0].type) == 2) {
|
||||
/* AND(val, 0x8000) gives the sign bit.
|
||||
*
|
||||
* Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is not zero.
|
||||
|
|
@ -878,7 +878,7 @@ emit_fsign(nir_to_brw_state &ntb, const fs_builder &bld, const nir_alu_instr *in
|
|||
}
|
||||
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
} else if (type_sz(op[0].type) == 4) {
|
||||
} else if (brw_type_size_bytes(op[0].type) == 4) {
|
||||
/* AND(val, 0x80000000) gives the sign bit.
|
||||
*
|
||||
* Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
|
||||
|
|
@ -1024,7 +1024,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
|
||||
default:
|
||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||
assert(type_sz(op[i].type) > 1);
|
||||
assert(brw_type_size_bytes(op[i].type) > 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1108,7 +1108,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
if (BRW_RND_MODE_UNSPECIFIED != rnd)
|
||||
bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(rnd));
|
||||
|
||||
assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
inst = bld.MOV(result, op[0]);
|
||||
break;
|
||||
}
|
||||
|
|
@ -1145,19 +1145,19 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
if (result.type == BRW_TYPE_B ||
|
||||
result.type == BRW_TYPE_UB ||
|
||||
result.type == BRW_TYPE_HF)
|
||||
assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
|
||||
if (op[0].type == BRW_TYPE_B ||
|
||||
op[0].type == BRW_TYPE_UB ||
|
||||
op[0].type == BRW_TYPE_HF)
|
||||
assert(type_sz(result.type) < 8); /* brw_nir_lower_conversions */
|
||||
assert(brw_type_size_bytes(result.type) < 8); /* brw_nir_lower_conversions */
|
||||
|
||||
inst = bld.MOV(result, op[0]);
|
||||
break;
|
||||
|
||||
case nir_op_i2i8:
|
||||
case nir_op_u2u8:
|
||||
assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
assert(brw_type_size_bytes(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
FALLTHROUGH;
|
||||
case nir_op_i2i16:
|
||||
case nir_op_u2u16: {
|
||||
|
|
@ -1220,7 +1220,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
}
|
||||
|
||||
if (op[0].type == BRW_TYPE_HF)
|
||||
assert(type_sz(result.type) < 8); /* brw_nir_lower_conversions */
|
||||
assert(brw_type_size_bytes(result.type) < 8); /* brw_nir_lower_conversions */
|
||||
|
||||
inst = bld.MOV(result, op[0]);
|
||||
break;
|
||||
|
|
@ -1468,7 +1468,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
|||
case nir_op_ine32: {
|
||||
fs_reg dest = result;
|
||||
|
||||
const uint32_t bit_size = type_sz(op[0].type) * 8;
|
||||
const uint32_t bit_size = brw_type_size_bits(op[0].type);
|
||||
if (bit_size != 32) {
|
||||
dest = bld.vgrf(op[0].type);
|
||||
bld.UNDEF(dest);
|
||||
|
|
@ -2517,7 +2517,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const fs_reg &dst,
|
|||
|
||||
fs_visitor &s = ntb.s;
|
||||
|
||||
assert(type_sz(dst.type) == 4);
|
||||
assert(brw_type_size_bytes(dst.type) == 4);
|
||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s.prog_data);
|
||||
const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
|
||||
|
||||
|
|
@ -3045,7 +3045,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
|
|||
* Also attempt to deal with gl_PointSize being in the .w component.
|
||||
*/
|
||||
if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
|
||||
assert(type_sz(dst.type) == 4);
|
||||
assert(brw_type_size_bytes(dst.type) == 4);
|
||||
inst->dst = bld.vgrf(dst.type, 4);
|
||||
inst->size_written = 4 * REG_SIZE * reg_unit(devinfo);
|
||||
bld.MOV(dst, offset(inst->dst, bld, 3));
|
||||
|
|
@ -4622,8 +4622,10 @@ static fs_reg
|
|||
brw_nir_reduction_op_identity(const fs_builder &bld,
|
||||
nir_op op, brw_reg_type type)
|
||||
{
|
||||
nir_const_value value = nir_alu_binop_identity(op, type_sz(type) * 8);
|
||||
switch (type_sz(type)) {
|
||||
nir_const_value value =
|
||||
nir_alu_binop_identity(op, brw_type_size_bits(type));
|
||||
|
||||
switch (brw_type_size_bytes(type)) {
|
||||
case 1:
|
||||
if (type == BRW_TYPE_UB) {
|
||||
return brw_imm_uw(value.u8);
|
||||
|
|
@ -6270,13 +6272,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
* the type size
|
||||
*/
|
||||
unsigned base_offset = nir_intrinsic_base(instr);
|
||||
assert(base_offset % 4 == 0 || base_offset % type_sz(dest.type) == 0);
|
||||
assert(base_offset % 4 == 0 || base_offset % brw_type_size_bytes(dest.type) == 0);
|
||||
|
||||
fs_reg src(UNIFORM, base_offset / 4, dest.type);
|
||||
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
unsigned load_offset = nir_src_as_uint(instr->src[0]);
|
||||
assert(load_offset % type_sz(dest.type) == 0);
|
||||
assert(load_offset % brw_type_size_bytes(dest.type) == 0);
|
||||
/* The base offset can only handle 32-bit units, so for 16-bit
|
||||
* data take the modulo of the offset with 4 bytes and add it to
|
||||
* the offset to read from within the source register.
|
||||
|
|
@ -6296,13 +6298,13 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
* one component of the vector.
|
||||
*/
|
||||
assert(nir_intrinsic_range(instr) >=
|
||||
instr->num_components * type_sz(dest.type));
|
||||
instr->num_components * brw_type_size_bytes(dest.type));
|
||||
unsigned read_size = nir_intrinsic_range(instr) -
|
||||
(instr->num_components - 1) * type_sz(dest.type);
|
||||
(instr->num_components - 1) * brw_type_size_bytes(dest.type);
|
||||
|
||||
bool supports_64bit_indirects = !intel_device_info_is_9lp(devinfo);
|
||||
|
||||
if (type_sz(dest.type) != 8 || supports_64bit_indirects) {
|
||||
if (brw_type_size_bytes(dest.type) != 8 || supports_64bit_indirects) {
|
||||
for (unsigned j = 0; j < instr->num_components; j++) {
|
||||
bld.emit(SHADER_OPCODE_MOV_INDIRECT,
|
||||
offset(dest, bld, j), offset(src, bld, j),
|
||||
|
|
@ -6310,12 +6312,12 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
}
|
||||
} else {
|
||||
const unsigned num_mov_indirects =
|
||||
type_sz(dest.type) / type_sz(BRW_TYPE_UD);
|
||||
brw_type_size_bytes(dest.type) / brw_type_size_bytes(BRW_TYPE_UD);
|
||||
/* We read a little bit less per MOV INDIRECT, as they are now
|
||||
* 32-bits ones instead of 64-bit. Fix read_size then.
|
||||
*/
|
||||
const unsigned read_size_32bit = read_size -
|
||||
(num_mov_indirects - 1) * type_sz(BRW_TYPE_UD);
|
||||
(num_mov_indirects - 1) * brw_type_size_bytes(BRW_TYPE_UD);
|
||||
for (unsigned j = 0; j < instr->num_components; j++) {
|
||||
for (unsigned i = 0; i < num_mov_indirects; i++) {
|
||||
bld.emit(SHADER_OPCODE_MOV_INDIRECT,
|
||||
|
|
@ -6344,14 +6346,14 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
fs_reg base_offset = retype(get_nir_src(ntb, instr->src[1]),
|
||||
BRW_TYPE_UD);
|
||||
|
||||
const unsigned comps_per_load = type_sz(dest.type) == 8 ? 2 : 4;
|
||||
const unsigned comps_per_load = brw_type_size_bytes(dest.type) == 8 ? 2 : 4;
|
||||
|
||||
for (int i = 0; i < instr->num_components; i += comps_per_load) {
|
||||
const unsigned remaining = instr->num_components - i;
|
||||
s.VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i),
|
||||
surface, surface_handle,
|
||||
base_offset,
|
||||
i * type_sz(dest.type),
|
||||
i * brw_type_size_bytes(dest.type),
|
||||
instr->def.bit_size / 8,
|
||||
MIN2(remaining, comps_per_load));
|
||||
}
|
||||
|
|
@ -6422,7 +6424,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
* we let CSE deal with duplicate loads. Here we see a vector access
|
||||
* and we have to split it if necessary.
|
||||
*/
|
||||
const unsigned type_size = type_sz(dest.type);
|
||||
const unsigned type_size = brw_type_size_bytes(dest.type);
|
||||
const unsigned load_offset = nir_src_as_uint(instr->src[1]);
|
||||
const unsigned ubo_block =
|
||||
brw_nir_ubo_surface_index_get_push_block(instr->src[0]);
|
||||
|
|
@ -7416,15 +7418,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
bld.emit_scan(brw_op, scan, cluster_size, cond_mod);
|
||||
|
||||
dest.type = src.type;
|
||||
if (cluster_size * type_sz(src.type) >= REG_SIZE * 2) {
|
||||
if (cluster_size * brw_type_size_bytes(src.type) >= REG_SIZE * 2) {
|
||||
/* In this case, CLUSTER_BROADCAST instruction isn't needed because
|
||||
* the distance between clusters is at least 2 GRFs. In this case,
|
||||
* we don't need the weird striding of the CLUSTER_BROADCAST
|
||||
* instruction and can just do regular MOVs.
|
||||
*/
|
||||
assert((cluster_size * type_sz(src.type)) % (REG_SIZE * 2) == 0);
|
||||
assert((cluster_size * brw_type_size_bytes(src.type)) % (REG_SIZE * 2) == 0);
|
||||
const unsigned groups =
|
||||
(s.dispatch_width * type_sz(src.type)) / (REG_SIZE * 2);
|
||||
(s.dispatch_width * brw_type_size_bytes(src.type)) / (REG_SIZE * 2);
|
||||
const unsigned group_size = s.dispatch_width / groups;
|
||||
for (unsigned i = 0; i < groups; i++) {
|
||||
const unsigned cluster = (i * group_size) / cluster_size;
|
||||
|
|
@ -7855,7 +7857,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
static fs_reg
|
||||
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
|
||||
{
|
||||
if (type_sz(src.type) == 2) {
|
||||
if (brw_type_size_bytes(src.type) == 2) {
|
||||
fs_reg src32 = bld.vgrf(BRW_TYPE_UD);
|
||||
bld.MOV(src32, retype(src, BRW_TYPE_UW));
|
||||
return src32;
|
||||
|
|
@ -8454,23 +8456,23 @@ shuffle_src_to_dst(const fs_builder &bld,
|
|||
uint32_t first_component,
|
||||
uint32_t components)
|
||||
{
|
||||
if (type_sz(src.type) == type_sz(dst.type)) {
|
||||
if (brw_type_size_bytes(src.type) == brw_type_size_bytes(dst.type)) {
|
||||
assert(!regions_overlap(dst,
|
||||
type_sz(dst.type) * bld.dispatch_width() * components,
|
||||
brw_type_size_bytes(dst.type) * bld.dispatch_width() * components,
|
||||
offset(src, bld, first_component),
|
||||
type_sz(src.type) * bld.dispatch_width() * components));
|
||||
brw_type_size_bytes(src.type) * bld.dispatch_width() * components));
|
||||
for (unsigned i = 0; i < components; i++) {
|
||||
bld.MOV(retype(offset(dst, bld, i), src.type),
|
||||
offset(src, bld, i + first_component));
|
||||
}
|
||||
} else if (type_sz(src.type) < type_sz(dst.type)) {
|
||||
} else if (brw_type_size_bytes(src.type) < brw_type_size_bytes(dst.type)) {
|
||||
/* Source is shuffled into destination */
|
||||
unsigned size_ratio = type_sz(dst.type) / type_sz(src.type);
|
||||
unsigned size_ratio = brw_type_size_bytes(dst.type) / brw_type_size_bytes(src.type);
|
||||
assert(!regions_overlap(dst,
|
||||
type_sz(dst.type) * bld.dispatch_width() *
|
||||
brw_type_size_bytes(dst.type) * bld.dispatch_width() *
|
||||
DIV_ROUND_UP(components, size_ratio),
|
||||
offset(src, bld, first_component),
|
||||
type_sz(src.type) * bld.dispatch_width() * components));
|
||||
brw_type_size_bytes(src.type) * bld.dispatch_width() * components));
|
||||
|
||||
brw_reg_type shuffle_type =
|
||||
brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(src.type));
|
||||
|
|
@ -8483,16 +8485,16 @@ shuffle_src_to_dst(const fs_builder &bld,
|
|||
}
|
||||
} else {
|
||||
/* Source is unshuffled into destination */
|
||||
unsigned size_ratio = type_sz(src.type) / type_sz(dst.type);
|
||||
unsigned size_ratio = brw_type_size_bytes(src.type) / brw_type_size_bytes(dst.type);
|
||||
assert(!regions_overlap(dst,
|
||||
type_sz(dst.type) * bld.dispatch_width() * components,
|
||||
brw_type_size_bytes(dst.type) * bld.dispatch_width() * components,
|
||||
offset(src, bld, first_component / size_ratio),
|
||||
type_sz(src.type) * bld.dispatch_width() *
|
||||
brw_type_size_bytes(src.type) * bld.dispatch_width() *
|
||||
DIV_ROUND_UP(components + (first_component % size_ratio),
|
||||
size_ratio)));
|
||||
|
||||
brw_reg_type shuffle_type =
|
||||
brw_type_with_size(BRW_TYPE_D, brw_Type_size_bits(dst.type));
|
||||
brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(dst.type));
|
||||
for (unsigned i = 0; i < components; i++) {
|
||||
fs_reg shuffle_component_i =
|
||||
subscript(offset(src, bld, (first_component + i) / size_ratio),
|
||||
|
|
@ -8510,13 +8512,13 @@ shuffle_from_32bit_read(const fs_builder &bld,
|
|||
uint32_t first_component,
|
||||
uint32_t components)
|
||||
{
|
||||
assert(type_sz(src.type) == 4);
|
||||
assert(brw_type_size_bytes(src.type) == 4);
|
||||
|
||||
/* This function takes components in units of the destination type while
|
||||
* shuffle_src_to_dst takes components in units of the smallest type
|
||||
*/
|
||||
if (type_sz(dst.type) > 4) {
|
||||
assert(type_sz(dst.type) == 8);
|
||||
if (brw_type_size_bytes(dst.type) > 4) {
|
||||
assert(brw_type_size_bytes(dst.type) == 8);
|
||||
first_component *= 2;
|
||||
components *= 2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read)
|
|||
unsigned i;
|
||||
unsigned size = lp->header_size * REG_SIZE;
|
||||
for (i = lp->header_size; size < size_read && i < lp->sources; i++)
|
||||
size += lp->exec_size * type_sz(lp->src[i].type);
|
||||
size += lp->exec_size * brw_type_size_bytes(lp->src[i].type);
|
||||
|
||||
/* Size read must cover exactly a subset of sources. */
|
||||
assert(size == size_read);
|
||||
|
|
@ -225,7 +225,7 @@ brw_fs_opt_zero_samples(fs_visitor &s)
|
|||
for (unsigned i = params - 1; i > first_param_idx; i--) {
|
||||
if (lp->src[i].file != BAD_FILE && !lp->src[i].is_zero())
|
||||
break;
|
||||
zero_size += lp->exec_size * type_sz(lp->src[i].type) * lp->dst.stride;
|
||||
zero_size += lp->exec_size * brw_type_size_bytes(lp->src[i].type) * lp->dst.stride;
|
||||
}
|
||||
|
||||
/* Round down to ensure to only consider full registers. */
|
||||
|
|
|
|||
|
|
@ -130,8 +130,8 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
* we might use the full accumulator in the MUL/MACH macro, we
|
||||
* shouldn't replace such MULs with MOVs.
|
||||
*/
|
||||
if ((brw_reg_type_to_size(inst->src[0].type) == 4 ||
|
||||
brw_reg_type_to_size(inst->src[1].type) == 4) &&
|
||||
if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
|
||||
brw_type_size_bytes(inst->src[1].type) == 4) &&
|
||||
(inst->dst.is_accumulator() ||
|
||||
inst->writes_accumulator_implicitly(devinfo)))
|
||||
break;
|
||||
|
|
@ -330,7 +330,7 @@ brw_fs_opt_algebraic(fs_visitor &s)
|
|||
|
||||
fs_reg result;
|
||||
|
||||
switch (type_sz(inst->src[0].type)) {
|
||||
switch (brw_type_size_bytes(inst->src[0].type)) {
|
||||
case 2:
|
||||
result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1061,7 +1061,8 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
* instruction and set force_writemask_all on the spill.
|
||||
*/
|
||||
const bool per_channel =
|
||||
inst->dst.is_contiguous() && type_sz(inst->dst.type) == 4 &&
|
||||
inst->dst.is_contiguous() &&
|
||||
brw_type_size_bytes(inst->dst.type) == 4 &&
|
||||
inst->exec_size == width;
|
||||
|
||||
/* Builder used to emit the scratch messages. */
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ is_nop_mov(const fs_inst *inst)
|
|||
}
|
||||
dst.offset += (i < inst->header_size ? REG_SIZE :
|
||||
inst->exec_size * dst.stride *
|
||||
type_sz(inst->src[i].type));
|
||||
brw_type_size_bytes(inst->src[i].type));
|
||||
}
|
||||
return true;
|
||||
} else if (inst->opcode == BRW_OPCODE_MOV) {
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ namespace {
|
|||
!inst->is_control_source(i)) {
|
||||
const brw_reg_type t = inst->src[i].type;
|
||||
has_int_src |= !brw_type_is_float(t);
|
||||
has_long_src |= type_sz(t) >= 8;
|
||||
has_long_src |= brw_type_size_bytes(t) >= 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -120,9 +120,11 @@ namespace {
|
|||
const brw_reg_type t = get_exec_type(inst);
|
||||
const bool is_dword_multiply = !brw_type_is_float(t) &&
|
||||
((inst->opcode == BRW_OPCODE_MUL &&
|
||||
MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
|
||||
MIN2(brw_type_size_bytes(inst->src[0].type),
|
||||
brw_type_size_bytes(inst->src[1].type)) >= 4) ||
|
||||
(inst->opcode == BRW_OPCODE_MAD &&
|
||||
MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
|
||||
MIN2(brw_type_size_bytes(inst->src[1].type),
|
||||
brw_type_size_bytes(inst->src[2].type)) >= 4));
|
||||
|
||||
if (is_unordered(devinfo, inst))
|
||||
return TGL_PIPE_NONE;
|
||||
|
|
@ -136,13 +138,14 @@ namespace {
|
|||
return TGL_PIPE_INT;
|
||||
else if (inst->opcode == FS_OPCODE_PACK_HALF_2x16_SPLIT)
|
||||
return TGL_PIPE_FLOAT;
|
||||
else if (devinfo->ver >= 20 && type_sz(inst->dst.type) >= 8 &&
|
||||
else if (devinfo->ver >= 20 &&
|
||||
brw_type_size_bytes(inst->dst.type) >= 8 &&
|
||||
brw_type_is_float(inst->dst.type)) {
|
||||
assert(devinfo->has_64bit_float);
|
||||
return TGL_PIPE_LONG;
|
||||
} else if (devinfo->ver < 20 &&
|
||||
(type_sz(inst->dst.type) >= 8 || type_sz(t) >= 8 ||
|
||||
is_dword_multiply)) {
|
||||
(brw_type_size_bytes(inst->dst.type) >= 8 ||
|
||||
brw_type_size_bytes(t) >= 8 || is_dword_multiply)) {
|
||||
assert(devinfo->has_64bit_float || devinfo->has_64bit_int ||
|
||||
devinfo->has_integer_dword_mul);
|
||||
return TGL_PIPE_LONG;
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ brw_fs_opt_peephole_sel(fs_visitor &s)
|
|||
|
||||
/* 64-bit immediates can't be placed in src1. */
|
||||
fs_reg src1(else_mov[i]->src[0]);
|
||||
if (src1.file == IMM && type_sz(src1.type) == 8) {
|
||||
if (src1.file == IMM && brw_type_size_bytes(src1.type) == 8) {
|
||||
src1 = ibld.vgrf(else_mov[i]->src[0].type);
|
||||
ibld.MOV(src1, else_mov[i]->src[0]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ brw_fs_validate(const fs_visitor &s)
|
|||
*/
|
||||
fsv_assert_lte(inst->src[i].vstride, 1);
|
||||
|
||||
if (type_sz(inst->src[i].type) > 4)
|
||||
if (brw_type_size_bytes(inst->src[i].type) > 4)
|
||||
fsv_assert_eq(inst->src[i].vstride, 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1318,7 +1318,7 @@ dstoperand:
|
|||
$$.type = $4;
|
||||
$$.writemask = $3;
|
||||
$$.swizzle = BRW_SWIZZLE_NOOP;
|
||||
$$.subnr = $$.subnr * brw_reg_type_to_size($4);
|
||||
$$.subnr = $$.subnr * brw_type_size_bytes($4);
|
||||
}
|
||||
;
|
||||
|
||||
|
|
@ -1329,7 +1329,7 @@ dstoperandex:
|
|||
$$.hstride = $2;
|
||||
$$.type = $4;
|
||||
$$.writemask = $3;
|
||||
$$.subnr = $$.subnr * brw_reg_type_to_size($4);
|
||||
$$.subnr = $$.subnr * brw_type_size_bytes($4);
|
||||
}
|
||||
/* BSpec says "When the conditional modifier is present, updates
|
||||
* to the selected flag register also occur. In this case, the
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ horiz_offset(const fs_reg ®, unsigned delta)
|
|||
return reg;
|
||||
case VGRF:
|
||||
case ATTR:
|
||||
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
|
||||
return byte_offset(reg, delta * reg.stride * brw_type_size_bytes(reg.type));
|
||||
case ARF:
|
||||
case FIXED_GRF:
|
||||
if (reg.is_null()) {
|
||||
|
|
@ -163,10 +163,10 @@ horiz_offset(const fs_reg ®, unsigned delta)
|
|||
const unsigned width = 1 << reg.width;
|
||||
|
||||
if (delta % width == 0) {
|
||||
return byte_offset(reg, delta / width * vstride * type_sz(reg.type));
|
||||
return byte_offset(reg, delta / width * vstride * brw_type_size_bytes(reg.type));
|
||||
} else {
|
||||
assert(vstride == hstride * width);
|
||||
return byte_offset(reg, delta * hstride * type_sz(reg.type));
|
||||
return byte_offset(reg, delta * hstride * brw_type_size_bytes(reg.type));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -245,7 +245,7 @@ reg_padding(const fs_reg &r)
|
|||
const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
|
||||
r.hstride == 0 ? 0 :
|
||||
1 << (r.hstride - 1));
|
||||
return (MAX2(1, stride) - 1) * type_sz(r.type);
|
||||
return (MAX2(1, stride) - 1) * brw_type_size_bytes(r.type);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -333,29 +333,29 @@ quarter(const fs_reg ®, unsigned idx)
|
|||
static inline fs_reg
|
||||
subscript(fs_reg reg, brw_reg_type type, unsigned i)
|
||||
{
|
||||
assert((i + 1) * type_sz(type) <= type_sz(reg.type));
|
||||
assert((i + 1) * brw_type_size_bytes(type) <= brw_type_size_bytes(reg.type));
|
||||
|
||||
if (reg.file == ARF || reg.file == FIXED_GRF) {
|
||||
/* The stride is encoded inconsistently for fixed GRF and ARF registers
|
||||
* as the log2 of the actual vertical and horizontal strides.
|
||||
*/
|
||||
const int delta = util_logbase2(type_sz(reg.type)) -
|
||||
util_logbase2(type_sz(type));
|
||||
const int delta = util_logbase2(brw_type_size_bytes(reg.type)) -
|
||||
util_logbase2(brw_type_size_bytes(type));
|
||||
reg.hstride += (reg.hstride ? delta : 0);
|
||||
reg.vstride += (reg.vstride ? delta : 0);
|
||||
|
||||
} else if (reg.file == IMM) {
|
||||
unsigned bit_size = type_sz(type) * 8;
|
||||
unsigned bit_size = brw_type_size_bits(type);
|
||||
reg.u64 >>= i * bit_size;
|
||||
reg.u64 &= BITFIELD64_MASK(bit_size);
|
||||
if (bit_size <= 16)
|
||||
reg.u64 |= reg.u64 << 16;
|
||||
return retype(reg, type);
|
||||
} else {
|
||||
reg.stride *= type_sz(reg.type) / type_sz(type);
|
||||
reg.stride *= brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
|
||||
}
|
||||
|
||||
return byte_offset(retype(reg, type), i * type_sz(type));
|
||||
return byte_offset(retype(reg, type), i * brw_type_size_bytes(type));
|
||||
}
|
||||
|
||||
static inline fs_reg
|
||||
|
|
@ -657,9 +657,9 @@ get_exec_type(const fs_inst *inst)
|
|||
if (inst->src[i].file != BAD_FILE &&
|
||||
!inst->is_control_source(i)) {
|
||||
const brw_reg_type t = get_exec_type(inst->src[i].type);
|
||||
if (type_sz(t) > type_sz(exec_type))
|
||||
if (brw_type_size_bytes(t) > brw_type_size_bytes(exec_type))
|
||||
exec_type = t;
|
||||
else if (type_sz(t) == type_sz(exec_type) &&
|
||||
else if (brw_type_size_bytes(t) == brw_type_size_bytes(exec_type) &&
|
||||
brw_type_is_float(t))
|
||||
exec_type = t;
|
||||
}
|
||||
|
|
@ -683,7 +683,7 @@ get_exec_type(const fs_inst *inst)
|
|||
* "Conversion between Integer and HF (Half Float) must be DWord aligned
|
||||
* and strided by a DWord on the destination."
|
||||
*/
|
||||
if (type_sz(exec_type) == 2 &&
|
||||
if (brw_type_size_bytes(exec_type) == 2 &&
|
||||
inst->dst.type != exec_type) {
|
||||
if (exec_type == BRW_TYPE_HF)
|
||||
exec_type = BRW_TYPE_F;
|
||||
|
|
@ -697,7 +697,7 @@ get_exec_type(const fs_inst *inst)
|
|||
static inline unsigned
|
||||
get_exec_type_size(const fs_inst *inst)
|
||||
{
|
||||
return type_sz(get_exec_type(inst));
|
||||
return brw_type_size_bytes(get_exec_type(inst));
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
@ -734,7 +734,7 @@ byte_stride(const fs_reg ®)
|
|||
case IMM:
|
||||
case VGRF:
|
||||
case ATTR:
|
||||
return reg.stride * type_sz(reg.type);
|
||||
return reg.stride * brw_type_size_bytes(reg.type);
|
||||
case ARF:
|
||||
case FIXED_GRF:
|
||||
if (reg.is_null()) {
|
||||
|
|
@ -745,9 +745,9 @@ byte_stride(const fs_reg ®)
|
|||
const unsigned width = 1 << reg.width;
|
||||
|
||||
if (width == 1) {
|
||||
return vstride * type_sz(reg.type);
|
||||
return vstride * brw_type_size_bytes(reg.type);
|
||||
} else if (hstride * width == vstride) {
|
||||
return hstride * type_sz(reg.type);
|
||||
return hstride * brw_type_size_bytes(reg.type);
|
||||
} else {
|
||||
return ~0u;
|
||||
}
|
||||
|
|
@ -783,12 +783,12 @@ has_dst_aligned_region_restriction(const intel_device_info *devinfo,
|
|||
*/
|
||||
const bool is_dword_multiply = !brw_type_is_float(exec_type) &&
|
||||
((inst->opcode == BRW_OPCODE_MUL &&
|
||||
MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
|
||||
MIN2(brw_type_size_bytes(inst->src[0].type), brw_type_size_bytes(inst->src[1].type)) >= 4) ||
|
||||
(inst->opcode == BRW_OPCODE_MAD &&
|
||||
MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
|
||||
MIN2(brw_type_size_bytes(inst->src[1].type), brw_type_size_bytes(inst->src[2].type)) >= 4));
|
||||
|
||||
if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
|
||||
(type_sz(exec_type) == 4 && is_dword_multiply))
|
||||
if (brw_type_size_bytes(dst_type) > 4 || brw_type_size_bytes(exec_type) > 4 ||
|
||||
(brw_type_size_bytes(exec_type) == 4 && is_dword_multiply))
|
||||
return intel_device_info_is_9lp(devinfo) || devinfo->verx10 >= 125;
|
||||
|
||||
else if (brw_type_is_float(dst_type))
|
||||
|
|
@ -818,10 +818,12 @@ has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
|||
{
|
||||
if (devinfo->ver >= 20 &&
|
||||
brw_type_is_int(inst->dst.type) &&
|
||||
MAX2(byte_stride(inst->dst), type_sz(inst->dst.type)) < 4) {
|
||||
MAX2(byte_stride(inst->dst),
|
||||
brw_type_size_bytes(inst->dst.type)) < 4) {
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (brw_type_is_int(srcs[i].type) &&
|
||||
type_sz(srcs[i].type) < 4 && byte_stride(srcs[i]) >= 4)
|
||||
brw_type_size_bytes(srcs[i].type) < 4 &&
|
||||
byte_stride(srcs[i]) >= 4)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -136,14 +136,14 @@ namespace {
|
|||
}
|
||||
|
||||
/* Convert the execution size to GRF units. */
|
||||
sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE);
|
||||
sx = DIV_ROUND_UP(inst->exec_size * brw_type_size_bytes(tx), REG_SIZE);
|
||||
|
||||
/* 32x32 integer multiplication has half the usual ALU throughput.
|
||||
* Treat it as double-precision.
|
||||
*/
|
||||
if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) &&
|
||||
!brw_type_is_float(tx) && type_sz(tx) == 4 &&
|
||||
type_sz(inst->src[0].type) == type_sz(inst->src[1].type))
|
||||
!brw_type_is_float(tx) && brw_type_size_bytes(tx) == 4 &&
|
||||
brw_type_size_bytes(inst->src[0].type) == brw_type_size_bytes(inst->src[1].type))
|
||||
tx = brw_int_type(8, tx == BRW_TYPE_D);
|
||||
|
||||
rcount = inst->opcode == BRW_OPCODE_DPAS ? inst->rcount : 0;
|
||||
|
|
@ -317,7 +317,7 @@ namespace {
|
|||
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,
|
||||
0, 10, 6 /* XXX */, 14, 0, 0);
|
||||
} else {
|
||||
if (type_sz(info.tx) > 4)
|
||||
if (brw_type_size_bytes(info.tx) > 4)
|
||||
return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4,
|
||||
0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0);
|
||||
else
|
||||
|
|
@ -335,7 +335,7 @@ namespace {
|
|||
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,
|
||||
0, 10, 6, 14, 0, 0);
|
||||
} else {
|
||||
if (type_sz(info.tx) > 4)
|
||||
if (brw_type_size_bytes(info.tx) > 4)
|
||||
return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4,
|
||||
0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0);
|
||||
else
|
||||
|
|
@ -358,7 +358,7 @@ namespace {
|
|||
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2,
|
||||
0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0);
|
||||
} else {
|
||||
if (type_sz(info.tx) > 4)
|
||||
if (brw_type_size_bytes(info.tx) > 4)
|
||||
return calculate_desc(info, EU_UNIT_FPU, 0, 4, 1, 0, 4,
|
||||
0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0);
|
||||
else
|
||||
|
|
@ -854,7 +854,7 @@ namespace {
|
|||
{
|
||||
assert(inst->reads_accumulator_implicitly() ||
|
||||
inst->writes_accumulator_implicitly(devinfo));
|
||||
const unsigned offset = (inst->group + i) * type_sz(tx) *
|
||||
const unsigned offset = (inst->group + i) * brw_type_size_bytes(tx) *
|
||||
(brw_type_is_float(tx) ? 1 : 2);
|
||||
return offset / (reg_unit(devinfo) * REG_SIZE) % 2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -200,11 +200,11 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
|
|||
const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
|
||||
const fs_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ?
|
||||
inst->src[URB_LOGICAL_SRC_DATA] : fs_reg(brw_imm_ud(0));
|
||||
assert(type_sz(src.type) == 4);
|
||||
assert(brw_type_size_bytes(src.type) == 4);
|
||||
|
||||
/* Calculate the total number of components of the payload. */
|
||||
const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA));
|
||||
const unsigned src_sz = type_sz(src.type);
|
||||
const unsigned src_sz = brw_type_size_bytes(src.type);
|
||||
|
||||
fs_reg payload = bld.vgrf(BRW_TYPE_UD);
|
||||
|
||||
|
|
@ -404,7 +404,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||
* hardware when doing a SIMD8 write depending on whether we have
|
||||
* selected the subspans for the first or second half respectively.
|
||||
*/
|
||||
assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4);
|
||||
assert(sample_mask.file != BAD_FILE &&
|
||||
brw_type_size_bytes(sample_mask.type) == 4);
|
||||
sample_mask.type = BRW_TYPE_UW;
|
||||
sample_mask.stride *= 2;
|
||||
|
||||
|
|
@ -1211,7 +1212,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
|||
*/
|
||||
for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {
|
||||
if (src[i].file != BAD_FILE) {
|
||||
src_type_size = brw_reg_type_to_size(src[i].type);
|
||||
src_type_size = brw_type_size_bytes(src[i].type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -1227,7 +1228,7 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
|
|||
if (inst->opcode != SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL) {
|
||||
for (unsigned i = 0; i < TEX_LOGICAL_NUM_SRCS; i++) {
|
||||
assert(src[i].file == BAD_FILE ||
|
||||
brw_reg_type_to_size(src[i].type) == src_type_size);
|
||||
brw_type_size_bytes(src[i].type) == src_type_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1664,8 +1665,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
/* Calculate the total number of components of the payload. */
|
||||
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
|
||||
const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||
const unsigned src_sz = type_sz(src.type);
|
||||
const unsigned dst_sz = type_sz(inst->dst.type);
|
||||
const unsigned src_sz = brw_type_size_bytes(src.type);
|
||||
const unsigned dst_sz = brw_type_size_bytes(inst->dst.type);
|
||||
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
|
||||
|
|
@ -1954,7 +1955,7 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
if (write) {
|
||||
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||
data = retype(bld.move_to_vgrf(src, src_sz), BRW_TYPE_UD);
|
||||
ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||
ex_mlen = src_sz * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE;
|
||||
}
|
||||
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
|
|
@ -1981,7 +1982,7 @@ emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
|||
{
|
||||
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||
|
||||
assert(type_sz(addr.type) == 8 && addr.stride == 0);
|
||||
assert(brw_type_size_bytes(addr.type) == 8 && addr.stride == 0);
|
||||
|
||||
fs_reg expanded_addr = addr;
|
||||
if (addr.file == UNIFORM) {
|
||||
|
|
@ -2031,8 +2032,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
/* Get the logical send arguments. */
|
||||
const fs_reg addr = inst->src[A64_LOGICAL_ADDRESS];
|
||||
const fs_reg src = inst->src[A64_LOGICAL_SRC];
|
||||
const unsigned src_sz = type_sz(src.type);
|
||||
const unsigned dst_sz = type_sz(inst->dst.type);
|
||||
const unsigned src_sz = brw_type_size_bytes(src.type);
|
||||
const unsigned dst_sz = brw_type_size_bytes(inst->dst.type);
|
||||
|
||||
const unsigned src_comps = inst->components_read(1);
|
||||
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
|
||||
|
|
@ -2181,13 +2182,13 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
payload = emit_a64_oword_block_header(bld, addr);
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL) {
|
||||
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||
ex_mlen = src_comps * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE;
|
||||
payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD);
|
||||
}
|
||||
} else {
|
||||
/* On Skylake and above, we have SENDS */
|
||||
mlen = 2 * (inst->exec_size / 8);
|
||||
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||
ex_mlen = src_comps * brw_type_size_bytes(src.type) * inst->exec_size / REG_SIZE;
|
||||
payload = retype(bld.move_to_vgrf(addr, 1), BRW_TYPE_UD);
|
||||
payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_TYPE_UD);
|
||||
}
|
||||
|
|
@ -2243,12 +2244,12 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
if (lsc_opcode_is_atomic_float((enum lsc_opcode) arg)) {
|
||||
desc =
|
||||
brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||
type_sz(inst->dst.type) * 8,
|
||||
brw_type_size_bits(inst->dst.type),
|
||||
lsc_op_to_legacy_atomic(arg),
|
||||
!inst->dst.is_null());
|
||||
} else {
|
||||
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size,
|
||||
type_sz(inst->dst.type) * 8,
|
||||
brw_type_size_bits(inst->dst.type),
|
||||
lsc_op_to_legacy_atomic(arg),
|
||||
!inst->dst.is_null());
|
||||
}
|
||||
|
|
@ -2583,7 +2584,8 @@ lower_btd_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
ubld.MOV(header, brw_imm_ud(0));
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
|
||||
assert(type_sz(global_addr.type) == 8 && global_addr.stride == 0);
|
||||
assert(brw_type_size_bytes(global_addr.type) == 8 &&
|
||||
global_addr.stride == 0);
|
||||
global_addr.type = BRW_TYPE_UD;
|
||||
global_addr.stride = 1;
|
||||
ubld.group(2, 0).MOV(header, global_addr);
|
||||
|
|
|
|||
|
|
@ -283,13 +283,6 @@ struct brw_indirect {
|
|||
unsigned pad:18;
|
||||
};
|
||||
|
||||
|
||||
static inline unsigned
|
||||
type_sz(unsigned type)
|
||||
{
|
||||
return brw_type_size_bytes((enum brw_reg_type) type);
|
||||
}
|
||||
|
||||
static inline enum brw_reg_type
|
||||
get_exec_type(const enum brw_reg_type type)
|
||||
{
|
||||
|
|
@ -366,7 +359,7 @@ brw_reg(enum brw_reg_file file,
|
|||
reg.abs = abs;
|
||||
reg.address_mode = BRW_ADDRESS_DIRECT;
|
||||
reg.pad0 = 0;
|
||||
reg.subnr = subnr * type_sz(type);
|
||||
reg.subnr = subnr * brw_type_size_bytes(type);
|
||||
reg.nr = nr;
|
||||
|
||||
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
|
||||
|
|
@ -531,7 +524,7 @@ byte_offset(struct brw_reg reg, unsigned bytes)
|
|||
static inline struct brw_reg
|
||||
suboffset(struct brw_reg reg, unsigned delta)
|
||||
{
|
||||
return byte_offset(reg, delta * type_sz(reg.type));
|
||||
return byte_offset(reg, delta * brw_type_size_bytes(reg.type));
|
||||
}
|
||||
|
||||
/** Construct unsigned word[16] register */
|
||||
|
|
@ -1006,11 +999,11 @@ spread(struct brw_reg reg, unsigned s)
|
|||
static inline struct brw_reg
|
||||
subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
|
||||
{
|
||||
unsigned scale = type_sz(reg.type) / type_sz(type);
|
||||
unsigned scale = brw_type_size_bytes(reg.type) / brw_type_size_bytes(type);
|
||||
assert(scale >= 1 && i < scale);
|
||||
|
||||
if (reg.file == IMM) {
|
||||
unsigned bit_size = type_sz(type) * 8;
|
||||
unsigned bit_size = brw_type_size_bits(type);
|
||||
reg.u64 >>= i * bit_size;
|
||||
reg.u64 &= BITFIELD64_MASK(bit_size);
|
||||
if (bit_size <= 16)
|
||||
|
|
@ -1238,17 +1231,17 @@ static inline unsigned
|
|||
element_sz(struct brw_reg reg)
|
||||
{
|
||||
if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
|
||||
return type_sz(reg.type);
|
||||
return brw_type_size_bytes(reg.type);
|
||||
|
||||
} else if (reg.width == BRW_WIDTH_1 &&
|
||||
reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
|
||||
return type_sz(reg.type) << (reg.vstride - 1);
|
||||
return brw_type_size_bytes(reg.type) << (reg.vstride - 1);
|
||||
|
||||
} else {
|
||||
assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
|
||||
assert(reg.vstride == reg.hstride + reg.width);
|
||||
return type_sz(reg.type) << (reg.hstride - 1);
|
||||
return brw_type_size_bytes(reg.type) << (reg.hstride - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -283,15 +283,6 @@ brw_a1_hw_3src_type_to_reg_type(const struct intel_device_info *devinfo,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the element size given a register type.
|
||||
*/
|
||||
unsigned
|
||||
brw_reg_type_to_size(enum brw_reg_type type)
|
||||
{
|
||||
return brw_type_size_bytes(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BRW_TYPE_* enum to a short string (F, UD, and so on).
|
||||
*
|
||||
|
|
|
|||
|
|
@ -177,9 +177,6 @@ enum brw_reg_type
|
|||
brw_a1_hw_3src_type_to_reg_type(const struct intel_device_info *devinfo,
|
||||
unsigned hw_type, unsigned exec_type);
|
||||
|
||||
unsigned
|
||||
brw_reg_type_to_size(enum brw_reg_type type);
|
||||
|
||||
const char *
|
||||
brw_reg_type_to_letters(enum brw_reg_type type);
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ fs_reg_saturate_immediate(fs_reg *reg)
|
|||
double df;
|
||||
} imm, sat_imm = { 0 };
|
||||
|
||||
const unsigned size = type_sz(reg->type);
|
||||
const unsigned size = brw_type_size_bytes(reg->type);
|
||||
|
||||
/* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
|
||||
* irrelevant, so just check the size of the type and copy from/to an
|
||||
|
|
@ -188,7 +188,7 @@ fs_reg::is_zero() const
|
|||
if (file != IMM)
|
||||
return false;
|
||||
|
||||
assert(type_sz(type) > 1);
|
||||
assert(brw_type_size_bytes(type) > 1);
|
||||
|
||||
switch (type) {
|
||||
case BRW_TYPE_HF:
|
||||
|
|
@ -219,7 +219,7 @@ fs_reg::is_one() const
|
|||
if (file != IMM)
|
||||
return false;
|
||||
|
||||
assert(type_sz(type) > 1);
|
||||
assert(brw_type_size_bytes(type) > 1);
|
||||
|
||||
switch (type) {
|
||||
case BRW_TYPE_HF:
|
||||
|
|
@ -250,7 +250,7 @@ fs_reg::is_negative_one() const
|
|||
if (file != IMM)
|
||||
return false;
|
||||
|
||||
assert(type_sz(type) > 1);
|
||||
assert(brw_type_size_bytes(type) > 1);
|
||||
|
||||
switch (type) {
|
||||
case BRW_TYPE_HF:
|
||||
|
|
@ -302,7 +302,7 @@ fs_inst::is_commutative() const
|
|||
* commutative. The DW source must be first.
|
||||
*/
|
||||
return !brw_type_is_int(src[0].type) ||
|
||||
type_sz(src[0].type) == type_sz(src[1].type);
|
||||
brw_type_size_bits(src[0].type) == brw_type_size_bits(src[1].type);
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
/* MIN and MAX are commutative. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue