diff --git a/src/nouveau/mme/mme_fermi_builder.c b/src/nouveau/mme/mme_fermi_builder.c index d6040759ef3..676c0057c6e 100644 --- a/src/nouveau/mme/mme_fermi_builder.c +++ b/src/nouveau/mme/mme_fermi_builder.c @@ -323,6 +323,23 @@ mme_fermi_bfe_to(struct mme_builder *b, struct mme_value dst, mme_fermi_bfe(fb, dst, pos, x, mme_zero(), bits); } +void +mme_fermi_umul_32x32_32_to_free_srcs(struct mme_builder *b, + struct mme_value dst, + struct mme_value x, + struct mme_value y) +{ + mme_while (b, ine, x, mme_zero()) { + struct mme_value lsb = mme_and(b, x, mme_imm(1)); + mme_if (b, ine, lsb, mme_zero()) { + mme_add_to(b, dst, dst, y); + } + mme_free_reg(b, lsb); + mme_srl_to(b, x, x, mme_imm(1u)); + mme_sll_to(b, y, y, mme_imm(1u)); + } +} + static struct mme_value mme_fermi_load_imm_to_reg(struct mme_builder *b, struct mme_value data) { @@ -685,6 +702,11 @@ mme_fermi_alu_to(struct mme_builder *b, return; } break; + case MME_ALU_OP_MUL: + x = mme_mov(b, x); + y = mme_mov(b, y); + mme_fermi_umul_32x32_32_to_free_srcs(b, dst, x, y); + return; case MME_ALU_OP_SLL: mme_fermi_sll_to(fb, dst, x, y); return; diff --git a/src/nouveau/mme/mme_fermi_builder.h b/src/nouveau/mme/mme_fermi_builder.h index b4c15ddbccb..0e955e7f090 100644 --- a/src/nouveau/mme/mme_fermi_builder.h +++ b/src/nouveau/mme/mme_fermi_builder.h @@ -109,6 +109,12 @@ void mme_fermi_bfe_to(struct mme_builder *b, struct mme_value dst, struct mme_value x, struct mme_value pos, uint8_t bits); +void +mme_fermi_umul_32x32_32_to_free_srcs(struct mme_builder *b, + struct mme_value dst, + struct mme_value x, + struct mme_value y); + void mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst, struct mme_value x, struct mme_value y, diff --git a/src/nouveau/mme/tests/mme_builder_test.cpp b/src/nouveau/mme/tests/mme_builder_test.cpp index 763a1893c49..0ccc55dd9e8 100644 --- a/src/nouveau/mme/tests/mme_builder_test.cpp +++ b/src/nouveau/mme/tests/mme_builder_test.cpp @@ -189,6 +189,48 @@ TEST_F(mme_builder_test, sub_imm) } } +static const uint32_t mul_cases[] = { + 0x00000000, + 0x00000001, + 0x0000005c, + 0x00c0ffee, + 0xffffffff, + 0x0000ffff, + 0x00008000, + 0x0001ffff, + 0xffff8000, + 0x00010000, + 0x00020000, + 0xfffc0000, + 0xfffe0000, +}; + +TEST_F(mme_builder_test, mul) +{ + for (auto sim : sims) { + mme_builder b; + mme_builder_init(&b, sim->devinfo); + + mme_value x = mme_load(&b); + mme_value y = mme_load(&b); + + sim->mme_store_data(&b, 0, mme_mul(&b, x, y)); + + auto macro = mme_builder_finish_vec(&b); + + for (uint32_t i = 0; i < ARRAY_SIZE(mul_cases); i++) { + for (uint32_t j = 0; j < ARRAY_SIZE(mul_cases); j++) { + std::vector params; + params.push_back(mul_cases[i]); + params.push_back(mul_cases[j]); + + sim->run_macro(macro, params); + ASSERT_EQ(sim->data[0], mul_cases[i] * mul_cases[j]); + } + } + } +} + TEST_F(mme_builder_test, sll_srl) { static const uint32_t x = 0xac406fe1;