From f1c30d65e155effa4a746b89e038fc246ae3a23e Mon Sep 17 00:00:00 2001 From: Olivia Lee Date: Fri, 23 Jan 2026 01:10:41 -0800 Subject: [PATCH] pan/v13: implement CS udiv This will be used for CmdDrawByteCountIndirect on v13, which requires dividing the byte count by the vertex stride to get the number of vertices in the draw. Signed-off-by: Olivia Lee Reviewed-by: Christoph Pillmayer Part-of: --- src/panfrost/genxml/cs_builder.h | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/panfrost/genxml/cs_builder.h b/src/panfrost/genxml/cs_builder.h index 588f8fed03f..e67201f29ca 100644 --- a/src/panfrost/genxml/cs_builder.h +++ b/src/panfrost/genxml/cs_builder.h @@ -13,6 +13,7 @@ #include "gen_macros.h" #include "util/bitset.h" +#include "util/fast_idiv_by_const.h" #include "util/u_dynarray.h" #ifdef __cplusplus @@ -2051,6 +2052,47 @@ cs_umul64(struct cs_builder *b, struct cs_index dest, struct cs_index src, } } } + +/* Needs 4 scratch registers */ +static inline void +cs_udiv32(struct cs_builder *b, struct cs_index dest, struct cs_index src, + uint32_t imm, struct cs_index scratch) +{ + assert(scratch.size >= 4); + assert(imm != 0); + + /* Fast path for power-of-two divisors */ + if (util_is_power_of_two_nonzero(imm)) { + cs_rshift_imm_u32(b, dest, src, util_logbase2(imm)); + return; + } + + struct util_fast_udiv_info info = util_compute_fast_udiv_info(imm, 32, 32); + + struct cs_index mul_src = cs_extract64(b, scratch, 0); + struct cs_index mul_src_lo = cs_extract32(b, scratch, 0); + struct cs_index mul_src_hi = cs_extract32(b, scratch, 1); + + struct cs_index mul_dest = cs_extract64(b, scratch, 2); + struct cs_index mul_dest_hi = cs_extract32(b, mul_dest, 1); + + if (info.pre_shift) + cs_rshift_imm_u32(b, mul_src_lo, src, info.pre_shift); + + if (info.increment != 0) + cs_add_imm32(b, mul_src_lo, info.pre_shift ? mul_src_lo : src, + info.increment); + + if (!info.pre_shift && !(info.increment != 0)) + cs_move_reg32(b, mul_src_lo, src); + cs_move32_to(b, mul_src_hi, 0); + + cs_umul64(b, mul_dest, mul_src, info.multiplier); + + /* (mul_dest << 32) implemented by taking the high register */ + + cs_rshift_imm_u32(b, dest, mul_dest_hi, info.post_shift); +} #endif static inline void