From 16c756e55d553673314d22931bb0a84864380ff3 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 17 Jun 2020 13:44:40 +0100 Subject: [PATCH] spirv: reverse order in matrix multiplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will create code that is easier to combine into MADs/FMA when the last component of the vector is 1.0. nir_opt_algebraic_late has an optimization to do something similar but it only works for inexact code, if the multiplication-by-1 optimization is done before it and if the backend enables fuse_ffma. fossil-db (Navi): Totals from 4296 (3.75% of 114665) affected shaders: SGPRs: 283468 -> 283764 (+0.10%); split: -0.02%, +0.12% VGPRs: 172868 -> 172904 (+0.02%); split: -0.09%, +0.11% CodeSize: 14045312 -> 14027128 (-0.13%); split: -0.15%, +0.02% MaxWaves: 59285 -> 59282 (-0.01%); split: +0.04%, -0.05% Instrs: 2703507 -> 2683187 (-0.75%); split: -0.76%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Marek Olšák Part-of: --- src/compiler/spirv/vtn_alu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 9daffbc7a4f..efc7eea560f 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -118,9 +118,9 @@ matrix_multiply(struct vtn_builder *b, for (unsigned i = 0; i < src1_columns; i++) { /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ dest->elems[i]->def = - nir_fmul(&b->nb, src0->elems[0]->def, - nir_channel(&b->nb, src1->elems[i]->def, 0)); - for (unsigned j = 1; j < src0_columns; j++) { + nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def, + nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); + for (int j = src0_columns - 2; j >= 0; j--) { dest->elems[i]->def = nir_fadd(&b->nb, dest->elems[i]->def, nir_fmul(&b->nb, src0->elems[j]->def,