From c23e2a662a84f7f2704c99f393bf65fc4e93a5ef Mon Sep 17 00:00:00 2001 From: Felix DeGrood Date: Wed, 19 May 2021 11:51:20 -0700 Subject: [PATCH] intel/compiler: tileY friendly LID order for CS Computer shaders that access tileY resources (textures) benefit from Y-locality accesses. Easiest way to implement this is walk local ids in Y-major fashion, instead of X-major fashion. Y-major local ids will reduce partial writes and increase cache locality for tileY accesses since tileY resources cachelines progress in Y direction. Improves performance on TGL: Borderlands3.dxvk-g2 +1.5% Y-major can introduce a performance drop on CS that use mixture of buffers and images. This should be fixed in next commit. Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- .../compiler/brw_nir_lower_cs_intrinsics.c | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index b77d6fe49bb..fd574603cbe 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -111,14 +111,28 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, nir_ssa_def *id_x, *id_y, *id_z; switch (state->nir->info.cs.derivative_group) { case DERIVATIVE_GROUP_NONE: - /* If not using derivatives, just set the local invocation - * index linearly, and calculate local invocation ID from that. - */ - id_x = nir_umod(b, linear, size_x); - id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y); + if (nir->info.num_images == 0 && + nir->info.num_textures == 0) { + /* X-major lid order. Optimal for linear accesses only, + * which are usually buffers. X,Y ordering will look like: + * (0,0) (1,0) (2,0) ... (size_x-1,0) (0,1) (1,1) ... + */ + id_x = nir_umod(b, linear, size_x); + id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y); + local_index = linear; + } else { + /* Y-major lid order. Optimal for tileY accesses only, + * which are usually images. X,Y ordering will look like: + * (0,0) (0,1) (0,2) ... (0,size_y-1) (1,0) (1,1) ... + */ + id_y = nir_umod(b, linear, size_y); + id_x = nir_umod(b, nir_udiv(b, linear, size_y), size_x); + local_index = nir_iadd(b, nir_iadd(b, id_x, + nir_imul(b, id_y, size_x)), + nir_imul(b, id_z, size_xy)); + } id_z = nir_udiv(b, linear, size_xy); local_id = nir_vec3(b, id_x, id_y, id_z); - local_index = linear; break; case DERIVATIVE_GROUP_LINEAR: /* For linear, just set the local invocation index linearly,