From c8ba2bc2f00cb0140ad4c01ad4cddce44d0dbadc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 29 Mar 2022 13:26:40 -0700 Subject: [PATCH] nir: Pack texture LOD and array index to a single 32-bit value v2: Fix clamped_ai calculation in nir_lower_tex.c. Add nir_tex_src_combined_lod_and_array_index_intel to print_tex_instr. Suggested by Sagar. Reviewed-by: Sagar Ghuge Part-of: --- src/compiler/nir/nir.c | 1 + src/compiler/nir/nir.h | 15 ++++++ src/compiler/nir/nir_lower_tex.c | 88 ++++++++++++++++++++++++++++++++ src/compiler/nir/nir_print.c | 3 ++ 4 files changed, 107 insertions(+) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 93bba8f80b0..d3ff6c44730 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -3208,6 +3208,7 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_sampler_offset: case nir_tex_src_texture_handle: case nir_tex_src_sampler_handle: + case nir_tex_src_combined_lod_and_array_index_intel: return nir_type_uint; case nir_num_tex_src_types: diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a20d9aaf383..bee72987dc2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2269,6 +2269,15 @@ typedef enum nir_tex_src_type { /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ nir_tex_src_backend2, + /** + * Backend-specific parameter that combines LOD parameter and array index. + * + * If this parameter is present, then nir_tex_src_lod and nir_tex_src_bias + * must not be present. Also vice versa. Only valid if nir_tex_instr::op + * is nir_texop_txl or nir_texop_txb and nir_tex_instr::is_array is set. + */ + nir_tex_src_combined_lod_and_array_index_intel, + nir_num_tex_src_types } nir_tex_src_type; @@ -5923,6 +5932,12 @@ typedef struct nir_lower_tex_options { */ bool lower_index_to_offset; + /** + * If true, pack either the explicit LOD or LOD bias and the array index + * into a single (32-bit) value when 32-bit texture coordinates are used. + */ + bool pack_lod_and_array_index; + /** * Payload data to be sent to callback / filter functions. */ diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 2865fd14647..dcc67d5e490 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -155,6 +155,86 @@ project_src(nir_builder *b, nir_tex_instr *tex) return true; } +/** + * Pack either the explicit LOD or LOD bias and the array index together. + */ +static bool +pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex) +{ + /* If 32-bit texture coordinates are used, pack either the explicit LOD or + * LOD bias and the array index into a single (32-bit) value. + */ + int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod); + if (lod_index < 0) { + lod_index = nir_tex_instr_src_index(tex, nir_tex_src_bias); + + /* The explicit LOD or LOD bias may not be found if this lowering has + * already occured. The explicit LOD may also not be found in some + * cases where it is zero. + */ + if (lod_index < 0) + return false; + } + + assert(nir_tex_instr_src_type(tex, lod_index) == nir_type_float); + + /* Also do not perform this packing if the explicit LOD is zero. */ + if (tex->op == nir_texop_txl && + nir_src_is_const(tex->src[lod_index].src) && + nir_src_as_float(tex->src[lod_index].src) == 0.0) { + return false; + } + + const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_index >= 0); + + nir_def *lod = tex->src[lod_index].src.ssa; + nir_def *coord = tex->src[coord_index].src.ssa; + + assert(nir_tex_instr_src_type(tex, coord_index) == nir_type_float); + + if (coord->bit_size < 32) + return false; + + b->cursor = nir_before_instr(&tex->instr); + + /* First, combine the two values. The packing format is a little weird. + * The explicit LOD / LOD bias is stored as float, as normal. However, the + * array index is converted to an integer and smashed into the low 9 bits. + */ + const unsigned array_index = tex->coord_components - 1; + + nir_def *clamped_ai = + nir_umin(b, + nir_f2u32(b, nir_fround_even(b, nir_swizzle(b, coord, + &array_index, 1))), + nir_imm_int(b, 511)); + + nir_def *lod_ai = + nir_ior(b, + nir_iand(b, lod, nir_imm_int(b, 0xfffffe00)), + clamped_ai); + + /* Second, replace the coordinate with a new value that has one fewer + * component (i.e., drop the array index). + */ + static const unsigned xyzw[] = { 0, 1, 2, 3 }; + + nir_def *reduced_coord = + nir_swizzle(b, coord, xyzw, tex->coord_components - 1); + + tex->coord_components--; + + /* Finally, remove the old sources and add the new. */ + nir_src_rewrite(&tex->src[coord_index].src, reduced_coord); + + nir_tex_instr_remove_src(tex, lod_index); + nir_tex_instr_add_src(tex, nir_tex_src_combined_lod_and_array_index_intel, + lod_ai); + + return true; +} + static bool lower_offset(nir_builder *b, nir_tex_instr *tex) { @@ -1575,6 +1655,13 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, progress = true; } + if (tex->is_array && + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + (tex->op == nir_texop_txl || tex->op == nir_texop_txb) && + options->pack_lod_and_array_index) { + progress = pack_lod_and_array_index(b, tex) || progress; + } + unsigned texture_index = tex->texture_index; uint32_t texture_mask = 1u << texture_index; int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); @@ -1716,6 +1803,7 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, * three opcodes provides one. Provide a default LOD of 0. */ if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && + (nir_tex_instr_src_index(tex, nir_tex_src_combined_lod_and_array_index_intel) == -1) && (tex->op == nir_texop_txf || tex->op == nir_texop_txs || tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) { b->cursor = nir_before_instr(&tex->instr); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 36ebd5b953d..ead70332717 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1751,6 +1751,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_tex_src_lod: fprintf(fp, "(lod)"); break; + case nir_tex_src_combined_lod_and_array_index_intel: + fprintf(fp, "(combined_lod_and_array_index_intel)"); + break; case nir_tex_src_min_lod: fprintf(fp, "(min_lod)"); break;