pan/bi: Use lower_frag_coord_to_pixel_coord

Instead of vendoring the logic. This has a side benefit of letting NIR
optimize the generated code a bit.

total instructions in shared programs: 2687284 -> 2687281 (<.01%)
instructions in affected programs: 532 -> 529 (-0.56%)
helped: 3
HURT: 1
Inconclusive result (value mean confidence interval includes 0).

total cycles in shared programs: 140711.33 -> 140711.31 (<.01%)
cycles in affected programs: 2.53 -> 2.52 (-0.62%)
helped: 1
HURT: 0

total fma in shared programs: 22059.44 -> 22059.39 (<.01%)
fma in affected programs: 2.69 -> 2.64 (-1.74%)
helped: 3
HURT: 0

total cvt in shared programs: 14659.09 -> 14659.09 (0.00%)
cvt in affected programs: 1.56 -> 1.56 (0.00%)
helped: 1
HURT: 1

total quadwords in shared programs: 1455408 -> 1455416 (<.01%)
quadwords in affected programs: 128 -> 136 (6.25%)
helped: 0
HURT: 1

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23836>
This commit is contained in:
Alyssa Rosenzweig 2023-06-23 10:47:20 -04:00 committed by Marge Bot
parent f318cab4a1
commit abe5b06a99

View file

@ -1456,30 +1456,14 @@ bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg,
}
}
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
* gl_FragCoord.z = ld_vary(fragz)
* gl_FragCoord.w = ld_vary(fragw)
*/
static void
bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_load_frag_coord_zw(bi_builder *b, bi_index dst, unsigned channel)
{
bi_index src[4] = {};
for (unsigned i = 0; i < 2; ++i) {
src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)),
bi_imm_f32(0.5f));
}
for (unsigned i = 0; i < 2; ++i) {
src[2 + i] = bi_ld_var_special(
b, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
BI_UPDATE_CLOBBER,
(i == 0) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
BI_VECSIZE_NONE);
}
bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32);
bi_ld_var_special_to(
b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
BI_UPDATE_CLOBBER,
(channel == 2) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
BI_VECSIZE_NONE);
}
static void
@ -1657,8 +1641,13 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_split_dest(b, instr->dest);
break;
case nir_intrinsic_load_frag_coord:
bi_emit_load_frag_coord(b, instr);
case nir_intrinsic_load_pixel_coord:
/* Vectorized load of the preloaded i16vec2 */
bi_mov_i32_to(b, dst, bi_preload(b, 59));
break;
case nir_intrinsic_load_frag_coord_zw:
bi_emit_load_frag_coord_zw(b, dst, nir_intrinsic_component(instr));
break;
case nir_intrinsic_load_converted_output_pan:
@ -4763,6 +4752,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_alu);
NIR_PASS_V(nir, nir_lower_frag_coord_to_pixel_coord);
}
static bi_context *