diff --git a/src/intel/common/mi_builder.h b/src/intel/common/mi_builder.h index 321dfacecf8..636b628e404 100644 --- a/src/intel/common/mi_builder.h +++ b/src/intel/common/mi_builder.h @@ -874,6 +874,101 @@ mi_ior(struct mi_builder *b, MI_ALU_STORE, MI_ALU_ACCU); } +#if GEN_VERSIONx10 >= 125 +static inline struct mi_value +mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1) +{ + if (src1.type == MI_VALUE_TYPE_IMM) { + assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); + assert(mi_value_to_u64(src1) <= 32); + } + + if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) + return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1)); + + return mi_math_binop(b, MI_ALU_SHL, src0, src1, + MI_ALU_STORE, MI_ALU_ACCU); +} + +static inline struct mi_value +mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) +{ + if (src1.type == MI_VALUE_TYPE_IMM) { + assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); + assert(mi_value_to_u64(src1) <= 32); + } + + if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) + return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1)); + + return mi_math_binop(b, MI_ALU_SHR, src0, src1, + MI_ALU_STORE, MI_ALU_ACCU); +} + +static inline struct mi_value +mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) +{ + if (shift == 0) + return src; + + if (shift >= 64) + return mi_imm(0); + + if (src.type == MI_VALUE_TYPE_IMM) + return mi_imm(mi_value_to_u64(src) >> shift); + + struct mi_value res = mi_value_to_gpr(b, src); + + /* Annoyingly, we only have power-of-two shifts */ + while (shift) { + int bit = u_bit_scan(&shift); + assert(bit <= 5); + res = mi_ushr(b, res, mi_imm(1 << bit)); + } + + return res; +} + +static inline struct mi_value +mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) +{ + if (src1.type == MI_VALUE_TYPE_IMM) { + assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); + assert(mi_value_to_u64(src1) <= 32); + } + + if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) + return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1)); + + return mi_math_binop(b, MI_ALU_SAR, src0, src1, + MI_ALU_STORE, MI_ALU_ACCU); +} + +static inline struct mi_value +mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) +{ + if (shift == 0) + return src; + + if (shift >= 64) + return mi_imm(0); + + if (src.type == MI_VALUE_TYPE_IMM) + return mi_imm((int64_t)mi_value_to_u64(src) >> shift); + + struct mi_value res = mi_value_to_gpr(b, src); + + /* Annoyingly, we only have power-of-two shifts */ + while (shift) { + int bit = u_bit_scan(&shift); + assert(bit <= 5); + res = mi_ishr(b, res, mi_imm(1 << bit)); + } + + return res; +} +#endif /* if GEN_VERSIONx10 >= 125 */ + static inline struct mi_value mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N) { @@ -918,8 +1013,17 @@ mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) struct mi_value res = mi_value_to_gpr(b, src); +#if GEN_VERSIONx10 >= 125 + /* Annoyingly, we only have power-of-two shifts */ + while (shift) { + int bit = u_bit_scan(&shift); + assert(bit <= 5); + res = mi_ishl(b, res, mi_imm(1 << bit)); + } +#else for (unsigned i = 0; i < shift; i++) res = mi_iadd(b, res, mi_value_ref(b, res)); +#endif return res; } diff --git a/src/intel/common/tests/mi_builder_test.cpp b/src/intel/common/tests/mi_builder_test.cpp index dfb0f9f9a9c..9f926387c1f 100644 --- a/src/intel/common/tests/mi_builder_test.cpp +++ b/src/intel/common/tests/mi_builder_test.cpp @@ -622,6 +622,114 @@ TEST_F(mi_builder_test, iand) mi_imm(values[1]))); } +#if GEN_VERSIONx10 >= 125 +TEST_F(mi_builder_test, ishl) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; + memcpy(input + 8, shifts, sizeof(shifts)); + + for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { + mi_store(&b, out_mem64(i * 8), + mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4))); + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { + EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), + mi_ishl(&b, mi_imm(value), mi_imm(shifts[i]))); + } +} + +TEST_F(mi_builder_test, ushr) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; + memcpy(input + 8, shifts, sizeof(shifts)); + + for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { + mi_store(&b, out_mem64(i * 8), + mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4))); + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { + EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), + mi_ushr(&b, mi_imm(value), mi_imm(shifts[i]))); + } +} + +TEST_F(mi_builder_test, ushr_imm) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + const unsigned max_shift = 64; + + for (unsigned i = 0; i <= max_shift; i++) + mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i)); + + submit_batch(); + + for (unsigned i = 0; i <= max_shift; i++) { + EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), + mi_ushr_imm(&b, mi_imm(value), i)); + } +} + +TEST_F(mi_builder_test, ishr) +{ + const uint64_t values[] = { + 0x0123456789abcdef, + 0xfedcba9876543210, + }; + memcpy(input, values, sizeof(values)); + + uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; + memcpy(input + 16, shifts, sizeof(shifts)); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) { + mi_store(&b, out_mem64(i * 8 + j * 16), + mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4))); + } + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) { + EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16), + mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j]))); + } + } +} + +TEST_F(mi_builder_test, ishr_imm) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + const unsigned max_shift = 64; + + for (unsigned i = 0; i <= max_shift; i++) + mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i)); + + submit_batch(); + + for (unsigned i = 0; i <= max_shift; i++) { + EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), + mi_ishr_imm(&b, mi_imm(value), i)); + } +} +#endif /* if GEN_VERSIONx10 >= 125 */ + TEST_F(mi_builder_test, imul_imm) { uint64_t lhs[2] = { diff --git a/src/intel/genxml/gen125.xml b/src/intel/genxml/gen125.xml index 316390f6fc7..e5c28865a9d 100644 --- a/src/intel/genxml/gen125.xml +++ b/src/intel/genxml/gen125.xml @@ -659,6 +659,9 @@ + + +