diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 75a4a5f3f81..63bdc6fcf38 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2126,6 +2126,21 @@ typedef enum { /** Plane index for multi-plane YCbCr textures */ nir_tex_src_plane, + /** + * Backend-specific vec4 tex src argument. + * + * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs) + * apply to the packing of the tex srcs. This lowering must only happen + * after nir_lower_tex(). + * + * The nir_tex_instr_src_type() of this argument is float, so no lowering + * will happen if nir_lower_int_to_float is used. + */ + nir_tex_src_backend1, + + /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ + nir_tex_src_backend2, + nir_num_tex_src_types } nir_tex_src_type; @@ -2438,6 +2453,8 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_min_lod: case nir_tex_src_ddx: case nir_tex_src_ddy: + case nir_tex_src_backend1: + case nir_tex_src_backend2: return nir_type_float; case nir_tex_src_offset: @@ -2496,6 +2513,10 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) return instr->coord_components; } + if (instr->src[src].src_type == nir_tex_src_backend1 || + instr->src[src].src_type == nir_tex_src_backend2) + return nir_src_num_components(instr->src[src].src); + return 1; } diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 47661d688e1..b42a21b09bb 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1123,6 +1123,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) fprintf(fp, " "); switch(instr->src[i].src_type) { + case nir_tex_src_backend1: + fprintf(fp, "(backend1)"); + break; + case nir_tex_src_backend2: + fprintf(fp, "(backend2)"); + break; case nir_tex_src_coord: fprintf(fp, "(coord)"); break; diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index 38d2db77a49..c5ce7d13da1 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -1864,8 +1864,6 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) struct ntt_tex_operand_state { struct ureg_src srcs[4]; unsigned i; - unsigned chan; - bool is_temp[4]; }; static void @@ -1878,44 +1876,7 @@ ntt_push_tex_arg(struct ntt_compile *c, if (tex_src < 0) return; - struct ureg_src src = ntt_get_src(c, instr->src[tex_src].src); - int num_components = nir_tex_instr_src_size(instr, tex_src); - - /* Find which src in the tex args we'll fit in. */ - if (s->chan + num_components > 4) { - s->chan = 0; - s->i++; - } - - /* Would need to fix up swizzling up to the writemask channel here. */ - assert(num_components == 1 || s->chan == 0); - if (num_components == 1) - src = ureg_scalar(src, 0); - - if (ureg_src_is_undef(s->srcs[s->i])) { - /* First emit of a tex operand's components, no need for a mov. */ - s->srcs[s->i] = src; - } else { - /* Otherwise, we need to have a temporary for all the components that go - * in this operand. - */ - if (!s->is_temp[s->i]) { - struct ureg_src prev_src = s->srcs[s->i]; - s->srcs[s->i] = ureg_src(ureg_DECL_temporary(c->ureg)); - s->is_temp[s->i] = true; - - ureg_MOV(c->ureg, - ureg_writemask(ureg_dst(s->srcs[s->i]), - BITFIELD_MASK(s->chan)), prev_src); - } - - ureg_MOV(c->ureg, - ureg_writemask(ureg_dst(s->srcs[s->i]), - BITFIELD_RANGE(s->chan, num_components)), - src); - } - - s->chan += num_components; + s->srcs[s->i++] = ntt_get_src(c, instr->src[tex_src].src); } static void @@ -1978,20 +1939,11 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) } struct ntt_tex_operand_state s = { .i = 0 }; - ntt_push_tex_arg(c, instr, nir_tex_src_coord, &s); - /* We always have at least two slots for the coordinate, even on 1D. */ - s.chan = MAX2(s.chan, 2); + ntt_push_tex_arg(c, instr, nir_tex_src_backend1, &s); + ntt_push_tex_arg(c, instr, nir_tex_src_backend2, &s); - ntt_push_tex_arg(c, instr, nir_tex_src_comparator, &s); - s.chan = MAX2(s.chan, 3); - - ntt_push_tex_arg(c, instr, nir_tex_src_bias, &s); - if (tex_opcode != TGSI_OPCODE_TXF_LZ) - ntt_push_tex_arg(c, instr, nir_tex_src_lod, &s); - - /* End of packed src setup, everything that follows gets its own operand. */ - if (s.chan) - s.i++; + /* non-coord arg for TXQ */ + ntt_push_tex_arg(c, instr, nir_tex_src_lod, &s); switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: @@ -2143,11 +2095,6 @@ ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr) ureg_MOV(c->ureg, dst, ureg_scalar(ureg_src(tex_dst), 3)); ureg_release_temporary(c->ureg, tex_dst); } - - for (int i = 0; i < s.i; i++) { - if (s.is_temp[i]) - ureg_release_temporary(c->ureg, ureg_dst(s.srcs[i])); - } } static void @@ -2702,6 +2649,94 @@ nir_to_tgsi_lower_64bit_to_vec2(nir_shader *s) NULL); } +struct ntt_lower_tex_state { + nir_ssa_def *channels[8]; + unsigned i; +}; + +static void +nir_to_tgsi_lower_tex_instr_arg(nir_builder *b, + nir_tex_instr *instr, + nir_tex_src_type tex_src_type, + struct ntt_lower_tex_state *s) +{ + int tex_src = nir_tex_instr_src_index(instr, tex_src_type); + if (tex_src < 0) + return; + + assert(instr->src[tex_src].src.is_ssa); + + nir_ssa_def *def = instr->src[tex_src].src.ssa; + for (int i = 0; i < def->num_components; i++) { + s->channels[s->i++] = nir_channel(b, def, i); + } + + nir_tex_instr_remove_src(instr, tex_src); +} + +/** + * Merges together a vec4 of tex coordinate/compare/bias/lod into a backend tex + * src. This lets NIR handle the coalescing of the vec4 rather than trying to + * manage it on our own, and may lead to more vectorization. + */ +static bool +nir_to_tgsi_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (nir_tex_instr_src_index(tex, nir_tex_src_coord) < 0) + return false; + + b->cursor = nir_before_instr(instr); + + struct ntt_lower_tex_state s = {0}; + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_coord, &s); + /* We always have at least two slots for the coordinate, even on 1D. */ + s.i = MAX2(s.i, 2); + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_comparator, &s); + s.i = MAX2(s.i, 3); + + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_bias, &s); + + /* XXX: LZ */ + nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_lod, &s); + + /* No need to pack undefs in unused channels of the tex instr */ + while (!s.channels[s.i - 1]) + s.i--; + + /* Instead of putting undefs in the unused slots of the vecs, just put in + * another used channel. Otherwise, we'll get unnecessary moves into + * registers. + */ + assert(s.channels[0] != NULL); + for (int i = 1; i < s.i; i++) { + if (!s.channels[i]) + s.channels[i] = s.channels[0]; + } + + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(nir_vec(b, s.channels, MIN2(s.i, 4)))); + if (s.i > 4) + nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(nir_vec(b, &s.channels[4], s.i - 4))); + + return true; +} + +static bool +nir_to_tgsi_lower_tex(nir_shader *s) +{ + return nir_shader_instructions_pass(s, + nir_to_tgsi_lower_tex_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} + static void ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) { @@ -2772,6 +2807,7 @@ nir_to_tgsi(struct nir_shader *s, .lower_txp = ~0, }; NIR_PASS_V(s, nir_lower_tex, &lower_tex_options); + NIR_PASS_V(s, nir_to_tgsi_lower_tex); if (!original_options->lower_uniforms_to_ubo) { NIR_PASS_V(s, nir_lower_uniforms_to_ubo,