diff --git a/src/nouveau/mme/mme_builder.h b/src/nouveau/mme/mme_builder.h index 8c6cc3222f2..1fa02e28941 100644 --- a/src/nouveau/mme/mme_builder.h +++ b/src/nouveau/mme/mme_builder.h @@ -633,6 +633,39 @@ MME_DEF_END_WHILE(ine, EQ, false) for (bool run = (mme_start_while(b), true); run; \ run = false, mme_end_while_##cmp((b), x, y)) +#define MME_DEF_EXIT(op, OP, if_true) \ +static inline void \ +mme_exit_if_##op(struct mme_builder *b, \ + struct mme_value x, struct mme_value y) \ +{ \ + if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \ + mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y); \ + else \ + unreachable("Unsupported GPU class"); \ +} + +MME_DEF_EXIT(ilt, LT, true) +MME_DEF_EXIT(ult, LTU, true) +MME_DEF_EXIT(ile, LE, true) +MME_DEF_EXIT(ule, LEU, true) +MME_DEF_EXIT(ieq, EQ, true) +MME_DEF_EXIT(ige, LT, false) +MME_DEF_EXIT(uge, LTU, false) +MME_DEF_EXIT(igt, LE, false) +MME_DEF_EXIT(ugt, LEU, false) +MME_DEF_EXIT(ine, EQ, false) + +#undef MME_DEF_EXIT + +#define mme_exit_if(b, cmp, x, y) \ + mme_exit_if_##cmp(b, x, y) + +static inline void +mme_exit(struct mme_builder *b) +{ + mme_exit_if_ieq(b, mme_zero(), mme_zero()); +} + #ifdef __cplusplus } #endif diff --git a/src/nouveau/mme/mme_tu104_builder.c b/src/nouveau/mme/mme_tu104_builder.c index 4b00ed0ad92..6ff413bd032 100644 --- a/src/nouveau/mme/mme_tu104_builder.c +++ b/src/nouveau/mme/mme_tu104_builder.c @@ -688,6 +688,24 @@ mme_tu104_end_while(struct mme_builder *b, mme_tu104_new_inst(tb); } +void mme_tu104_exit_if(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y) +{ + struct mme_tu104_builder *tb = &b->tu104; + + /* we reverse it as we want to take the branch if the condition is true */ + uint16_t control = if_true ? BITFIELD_BIT(15) : 0; + /* magic offset to exit the macro */ + control |= 0x1000; + build_alu_to(b, mme_zero(), mme_cmp_to_tu104_branch_op(op), x, y, control, + true); + + mme_tu104_new_inst(tb); +} + uint32_t * mme_tu104_builder_finish(struct mme_tu104_builder *tb, size_t *size_out) { diff --git a/src/nouveau/mme/mme_tu104_builder.h b/src/nouveau/mme/mme_tu104_builder.h index c9f834e7d33..1cf605ba603 100644 --- a/src/nouveau/mme/mme_tu104_builder.h +++ b/src/nouveau/mme/mme_tu104_builder.h @@ -85,6 +85,12 @@ void mme_tu104_end_while(struct mme_builder *b, struct mme_value x, struct mme_value y); +void mme_tu104_exit_if(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y); + uint32_t *mme_tu104_builder_finish(struct mme_tu104_builder *b, size_t *size_out); diff --git a/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp b/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp index 6c81e5b8d42..6b1d06e58a7 100644 --- a/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp +++ b/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp @@ -1200,6 +1200,73 @@ TEST_F(mme_tu104_sim_test, bxx_exit) ASSERT_EQ(data[i], 0); } +TEST_F(mme_tu104_sim_test, mme_exit) +{ + mme_builder b; + mme_builder_init(&b, devinfo); + + mme_value vals[10]; + for (uint32_t i = 0; i < 10; i++) + vals[i] = mme_mov(&b, mme_zero()); + + for (uint32_t i = 0; i < 10; i++) + mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(0)); + + /* abort */ + mme_exit(&b); + + /* those writes won't be visible */ + for (uint32_t i = 0; i < 10; i++) + vals[i] = mme_mov(&b, mme_imm(i)); + + for (uint32_t i = 0; i < 10; i++) { + mme_store_imm_addr(&b, data_addr + i * 4, vals[i]); + } + + std::vector params; + + auto macro = mme_builder_finish_vec(&b); + test_macro(&b, macro, params); + + uint32_t i; + for (i = 0; i < 10; i++) + ASSERT_EQ(data[i], 0); +} + +TEST_F(mme_tu104_sim_test, mme_exit_if) +{ + mme_builder b; + mme_builder_init(&b, devinfo); + + mme_value vals[10]; + for (uint32_t i = 0; i < 10; i++) + vals[i] = mme_mov(&b, mme_zero()); + + for (uint32_t i = 0; i < 10; i++) + mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(0)); + + /* shouldn't do anything */ + mme_exit_if(&b, ieq, mme_zero(), mme_imm(1)); + + for (uint32_t i = 0; i < 10; i++) + vals[i] = mme_mov(&b, mme_imm(i)); + + for (uint32_t i = 0; i < 10; i++) { + /* abort on reaching 5 */ + mme_exit_if(&b, ile, mme_imm(5), vals[i]); + mme_store_imm_addr(&b, data_addr + i * 4, vals[i]); + } + + std::vector params; + + auto macro = mme_builder_finish_vec(&b); + test_macro(&b, macro, params); + + uint32_t i; + for (i = 0; i < 10; i++) + ASSERT_EQ(data[i], i < 5 ? i : 0); +} + static bool c_ilt(int32_t x, int32_t y) { return x < y; }; static bool c_ult(uint32_t x, uint32_t y) { return x < y; }; static bool c_ile(int32_t x, int32_t y) { return x <= y; };