nir: Pack texture LOD and array index to a single 32-bit value

v2: Fix clamped_ai calculation in nir_lower_tex.c. Add
nir_tex_src_combined_lod_and_array_index_intel to
print_tex_instr. Suggested by Sagar.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27305>
This commit is contained in:
Ian Romanick 2022-03-29 13:26:40 -07:00 committed by Marge Bot
parent 78e7f7b377
commit c8ba2bc2f0
4 changed files with 107 additions and 0 deletions

View file

@ -3208,6 +3208,7 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
case nir_tex_src_sampler_offset:
case nir_tex_src_texture_handle:
case nir_tex_src_sampler_handle:
case nir_tex_src_combined_lod_and_array_index_intel:
return nir_type_uint;
case nir_num_tex_src_types:

View file

@ -2269,6 +2269,15 @@ typedef enum nir_tex_src_type {
/** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
nir_tex_src_backend2,
/**
* Backend-specific parameter that combines LOD parameter and array index.
*
* If this parameter is present, then nir_tex_src_lod and nir_tex_src_bias
* must not be present. Also vice versa. Only valid if nir_tex_instr::op
* is nir_texop_txl or nir_texop_txb and nir_tex_instr::is_array is set.
*/
nir_tex_src_combined_lod_and_array_index_intel,
nir_num_tex_src_types
} nir_tex_src_type;
@ -5923,6 +5932,12 @@ typedef struct nir_lower_tex_options {
*/
bool lower_index_to_offset;
/**
* If true, pack either the explicit LOD or LOD bias and the array index
* into a single (32-bit) value when 32-bit texture coordinates are used.
*/
bool pack_lod_and_array_index;
/**
* Payload data to be sent to callback / filter functions.
*/

View file

@ -155,6 +155,86 @@ project_src(nir_builder *b, nir_tex_instr *tex)
return true;
}
/**
* Pack either the explicit LOD or LOD bias and the array index together.
*/
static bool
pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
{
/* If 32-bit texture coordinates are used, pack either the explicit LOD or
* LOD bias and the array index into a single (32-bit) value.
*/
int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod);
if (lod_index < 0) {
lod_index = nir_tex_instr_src_index(tex, nir_tex_src_bias);
/* The explicit LOD or LOD bias may not be found if this lowering has
* already occured. The explicit LOD may also not be found in some
* cases where it is zero.
*/
if (lod_index < 0)
return false;
}
assert(nir_tex_instr_src_type(tex, lod_index) == nir_type_float);
/* Also do not perform this packing if the explicit LOD is zero. */
if (tex->op == nir_texop_txl &&
nir_src_is_const(tex->src[lod_index].src) &&
nir_src_as_float(tex->src[lod_index].src) == 0.0) {
return false;
}
const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
assert(coord_index >= 0);
nir_def *lod = tex->src[lod_index].src.ssa;
nir_def *coord = tex->src[coord_index].src.ssa;
assert(nir_tex_instr_src_type(tex, coord_index) == nir_type_float);
if (coord->bit_size < 32)
return false;
b->cursor = nir_before_instr(&tex->instr);
/* First, combine the two values. The packing format is a little weird.
* The explicit LOD / LOD bias is stored as float, as normal. However, the
* array index is converted to an integer and smashed into the low 9 bits.
*/
const unsigned array_index = tex->coord_components - 1;
nir_def *clamped_ai =
nir_umin(b,
nir_f2u32(b, nir_fround_even(b, nir_swizzle(b, coord,
&array_index, 1))),
nir_imm_int(b, 511));
nir_def *lod_ai =
nir_ior(b,
nir_iand(b, lod, nir_imm_int(b, 0xfffffe00)),
clamped_ai);
/* Second, replace the coordinate with a new value that has one fewer
* component (i.e., drop the array index).
*/
static const unsigned xyzw[] = { 0, 1, 2, 3 };
nir_def *reduced_coord =
nir_swizzle(b, coord, xyzw, tex->coord_components - 1);
tex->coord_components--;
/* Finally, remove the old sources and add the new. */
nir_src_rewrite(&tex->src[coord_index].src, reduced_coord);
nir_tex_instr_remove_src(tex, lod_index);
nir_tex_instr_add_src(tex, nir_tex_src_combined_lod_and_array_index_intel,
lod_ai);
return true;
}
static bool
lower_offset(nir_builder *b, nir_tex_instr *tex)
{
@ -1575,6 +1655,13 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
progress = true;
}
if (tex->is_array &&
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
(tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
options->pack_lod_and_array_index) {
progress = pack_lod_and_array_index(b, tex) || progress;
}
unsigned texture_index = tex->texture_index;
uint32_t texture_mask = 1u << texture_index;
int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
@ -1716,6 +1803,7 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
* three opcodes provides one. Provide a default LOD of 0.
*/
if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
(nir_tex_instr_src_index(tex, nir_tex_src_combined_lod_and_array_index_intel) == -1) &&
(tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
b->cursor = nir_before_instr(&tex->instr);

View file

@ -1751,6 +1751,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
case nir_tex_src_lod:
fprintf(fp, "(lod)");
break;
case nir_tex_src_combined_lod_and_array_index_intel:
fprintf(fp, "(combined_lod_and_array_index_intel)");
break;
case nir_tex_src_min_lod:
fprintf(fp, "(min_lod)");
break;