From 8ffb48c90a82c73c5404269026df39475fd8b137 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 3 Aug 2022 15:17:50 -0700 Subject: [PATCH] tu: Only emit FS output regs for as many MRTs as we have (but at least 1). This seems to be what the blob does, should save a bit of CP overhead. zink drawoverhead throughput on test 7 (shader change) +0.234668% +/- 0.148818% (n=25). Part-of: --- src/freedreno/vulkan/tu_pipeline.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 079eca24c6a..256062aef24 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1570,13 +1570,14 @@ tu6_emit_fs_outputs(struct tu_cs *cs, smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK); stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL); - uint32_t fragdata_regid[8]; + int output_reg_count = MAX2(mrt_count, 1); + uint32_t fragdata_regid[output_reg_count]; if (fs->color0_mrt) { fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR); - for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++) + for (uint32_t i = 1; i < output_reg_count; i++) fragdata_regid[i] = fragdata_regid[0]; } else { - for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) + for (uint32_t i = 0; i < output_reg_count; i++) fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i); } @@ -1589,8 +1590,8 @@ tu6_emit_fs_outputs(struct tu_cs *cs, uint32_t fs_render_components = 0; - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); - for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) { + tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), output_reg_count); + for (uint32_t i = 0; i < output_reg_count; i++) { tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) | (COND(fragdata_regid[i] & HALF_REG_ID, A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)));