pco: reserve additional outputs for trilinear sampled coeffs

Sampling coeffs with trilinear filtering will output 2x sets of data.
Whether bilinear or trilinear filtering is in use can't be determined
without checking state words, so unconditionally reserve 2x to avoid
clobbering output regs.

Fixes: 7df32ba09d ("pco: initial texture/sampler compiler support")
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Tested-by: Icenowy Zheng <zhengxingda@iscas.ac.cn>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41051>
This commit is contained in:
Simon Perretta 2026-02-23 09:12:21 +00:00 committed by Marge Bot
parent f13bec7934
commit af1669d9e2
2 changed files with 7 additions and 3 deletions

View file

@ -3011,7 +3011,8 @@ intrinsic("smp_pco", src_comp=[16, 4, 4], dest_comp=0, indices=[SMP_FLAGS_PCO, R
# smp_coeffs_pco(data, tex_state, smp_state)
# Returns the calculated sampling coefficients for the given data and state words.
intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=8, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# Actually outputs 7/14 components, but NIR doesn't support those for num_components, so fake it as 16 for now.
intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=16, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# smp_raw_pco(data, tex_state, smp_state)
# Returns the raw sampling data for the given data and state words.

View file

@ -1697,8 +1697,11 @@ static pco_instr *lower_smp(trans_ctx *tctx,
enum pco_sb_mode sb_mode = PCO_SB_MODE_NONE;
switch (intr->intrinsic) {
case nir_intrinsic_smp_coeffs_pco:
/* Shrink the destination to its actual size. */
*dest = pco_ref_chans(*dest, ROGUE_SMP_COEFF_COUNT);
/* Shrink the destination to its actual size.
* Trilinear filtering will produce two sets of coeffs;
* reserve both just in case so that we don't clobber output regs.
*/
*dest = pco_ref_chans(*dest, ROGUE_SMP_COEFF_COUNT * 2u);
chans = 1; /* Chans must be 1 for coeff mode. */
sb_mode = PCO_SB_MODE_COEFFS;