mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
ir3,tu,freedreno: Allow more tex coord interpolations for prefetch
FS tex prefetch reads tex coords from r0.x, and it doesn't care what interpolation they have. Thus we can allow all interpolations which HLSQ_CONTROL_3_REG/HLSQ_CONTROL_4_REG support. Which would be: (pixel, centroid, sample) x (nopersp, persp). So all but FLAT are supported. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34422>
This commit is contained in:
parent
c4c7482a90
commit
cc7aa31b30
8 changed files with 91 additions and 29 deletions
|
|
@ -51,10 +51,10 @@ traces:
|
|||
freedreno-a630:
|
||||
checksum: e0e18dcc50ab2e23cead650d64469178
|
||||
zink-a618:
|
||||
checksum: b589b5d9ddd3026cbde08f0abe840ea7
|
||||
checksum: b7e0cdb0db74ea9a31fb7a75ae0d76fc
|
||||
zink-a630:
|
||||
label: [skip, flakes]
|
||||
checksum: b589b5d9ddd3026cbde08f0abe840ea7
|
||||
checksum: b7e0cdb0db74ea9a31fb7a75ae0d76fc
|
||||
text: seems to trigger oomkilling recently
|
||||
|
||||
valve/counterstrike-source-v2.trace:
|
||||
|
|
|
|||
|
|
@ -3854,7 +3854,7 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
|
|||
ir3_builder_at(ir3_before_terminator(ctx->in_block));
|
||||
sam = ir3_SAM(&build, opc, type, MASK(ncomp), 0, NULL,
|
||||
get_barycentric(ctx, IJ_PERSP_PIXEL), 0);
|
||||
sam->prefetch.input_offset = ir3_nir_coord_offset(tex->src[idx].src.ssa);
|
||||
sam->prefetch.input_offset = ir3_nir_coord_offset(tex->src[idx].src.ssa, NULL);
|
||||
/* make sure not to add irrelevant flags like S2EN */
|
||||
sam->flags = flags | (info.flags & IR3_INSTR_B);
|
||||
sam->prefetch.tex = info.tex_idx;
|
||||
|
|
@ -5810,7 +5810,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
int idx = 0;
|
||||
|
||||
foreach_input (instr, ir) {
|
||||
if (instr->input.sysval != SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)
|
||||
if (instr->input.sysval !=
|
||||
(SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + so->prefetch_bary_type))
|
||||
continue;
|
||||
|
||||
assert(idx < 2);
|
||||
|
|
|
|||
|
|
@ -109,8 +109,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
|||
/* Enable the texture pre-fetch feature only a4xx onwards. But
|
||||
* only enable it on generations that have been tested:
|
||||
*/
|
||||
if ((so->type == MESA_SHADER_FRAGMENT) && compiler->has_fs_tex_prefetch)
|
||||
NIR_PASS(_, ctx->s, ir3_nir_lower_tex_prefetch);
|
||||
if ((so->type == MESA_SHADER_FRAGMENT) && compiler->has_fs_tex_prefetch) {
|
||||
NIR_PASS(_, ctx->s, ir3_nir_lower_tex_prefetch, &so->prefetch_bary_type);
|
||||
}
|
||||
|
||||
bool vectorized = false;
|
||||
NIR_PASS(vectorized, ctx->s, nir_opt_vectorize, ir3_nir_vectorize_filter,
|
||||
|
|
|
|||
|
|
@ -27,8 +27,9 @@ bool ir3_nir_lower_push_consts_to_preamble(nir_shader *nir,
|
|||
bool ir3_nir_lower_driver_params_to_ubo(nir_shader *nir,
|
||||
struct ir3_shader_variant *v);
|
||||
bool ir3_nir_move_varying_inputs(nir_shader *shader);
|
||||
int ir3_nir_coord_offset(nir_def *ssa);
|
||||
bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
|
||||
int ir3_nir_coord_offset(nir_def *ssa, gl_system_value *bary_type);
|
||||
bool ir3_nir_lower_tex_prefetch(nir_shader *shader,
|
||||
enum ir3_bary *prefetch_bary_type);
|
||||
bool ir3_nir_lower_layer_id(nir_shader *shader);
|
||||
bool ir3_nir_lower_frag_shading_rate(nir_shader *shader);
|
||||
bool ir3_nir_lower_primitive_shading_rate(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -5,13 +5,25 @@
|
|||
|
||||
#include "ir3_nir.h"
|
||||
|
||||
#include "util/u_vector.h"
|
||||
|
||||
/**
|
||||
* A pass which detects tex instructions which are candidate to be executed
|
||||
* prior to FS shader start, and change them to nir_texop_tex_prefetch.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
nir_tex_instr *tex;
|
||||
enum ir3_bary bary;
|
||||
} tex_prefetch_candidate;
|
||||
|
||||
typedef struct {
|
||||
struct u_vector candidates;
|
||||
uint32_t per_bary_candidates[IJ_COUNT];
|
||||
} ir3_prefetch_state;
|
||||
|
||||
static int
|
||||
coord_offset(nir_def *ssa)
|
||||
coord_offset(nir_def *ssa, gl_system_value *bary_type)
|
||||
{
|
||||
nir_instr *parent_instr = ssa->parent_instr;
|
||||
|
||||
|
|
@ -27,7 +39,7 @@ coord_offset(nir_def *ssa)
|
|||
if (alu->op != nir_op_vec2)
|
||||
return -1;
|
||||
|
||||
int base_src_offset = coord_offset(alu->src[0].src.ssa);
|
||||
int base_src_offset = coord_offset(alu->src[0].src.ssa, bary_type);
|
||||
if (base_src_offset < 0)
|
||||
return -1;
|
||||
|
||||
|
|
@ -35,7 +47,7 @@ coord_offset(nir_def *ssa)
|
|||
|
||||
/* NOTE it might be possible to support more than 2D? */
|
||||
for (int i = 1; i < 2; i++) {
|
||||
int nth_src_offset = coord_offset(alu->src[i].src.ssa);
|
||||
int nth_src_offset = coord_offset(alu->src[i].src.ssa, bary_type);
|
||||
if (nth_src_offset < 0)
|
||||
return -1;
|
||||
int nth_offset = nth_src_offset + alu->src[i].swizzle[0];
|
||||
|
|
@ -62,20 +74,26 @@ coord_offset(nir_def *ssa)
|
|||
nir_intrinsic_instr *interp =
|
||||
nir_instr_as_intrinsic(input->src[0].ssa->parent_instr);
|
||||
|
||||
if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel)
|
||||
if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel &&
|
||||
interp->intrinsic != nir_intrinsic_load_barycentric_sample &&
|
||||
interp->intrinsic != nir_intrinsic_load_barycentric_centroid)
|
||||
return -1;
|
||||
|
||||
/* interpolation modes such as noperspective aren't covered by the other
|
||||
/* interpolation modes such as flat aren't covered by the other
|
||||
* test, we need to explicitly check for them here.
|
||||
*/
|
||||
unsigned interp_mode = nir_intrinsic_interp_mode(interp);
|
||||
if (interp_mode != INTERP_MODE_NONE && interp_mode != INTERP_MODE_SMOOTH)
|
||||
if (interp_mode != INTERP_MODE_NONE && interp_mode != INTERP_MODE_SMOOTH &&
|
||||
interp_mode != INTERP_MODE_NOPERSPECTIVE)
|
||||
return -1;
|
||||
|
||||
/* we also need a const input offset: */
|
||||
if (!nir_src_is_const(input->src[1]))
|
||||
return -1;
|
||||
|
||||
if (bary_type)
|
||||
*bary_type = ir3_nir_intrinsic_barycentric_sysval(interp);
|
||||
|
||||
unsigned base = nir_src_as_uint(input->src[1]) + nir_intrinsic_base(input);
|
||||
unsigned comp = nir_intrinsic_component(input);
|
||||
|
||||
|
|
@ -83,11 +101,13 @@ coord_offset(nir_def *ssa)
|
|||
}
|
||||
|
||||
int
|
||||
ir3_nir_coord_offset(nir_def *ssa)
|
||||
ir3_nir_coord_offset(nir_def *ssa, gl_system_value *bary_type)
|
||||
{
|
||||
|
||||
assert(ssa->num_components == 2);
|
||||
return coord_offset(ssa);
|
||||
if (bary_type)
|
||||
*bary_type = SYSTEM_VALUE_MAX;
|
||||
return coord_offset(ssa, bary_type);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -136,7 +156,7 @@ ok_tex_samp(nir_tex_instr *tex)
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_tex_prefetch_block(nir_block *block)
|
||||
lower_tex_prefetch_block(nir_block *block, ir3_prefetch_state *state)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
|
|
@ -168,8 +188,14 @@ lower_tex_prefetch_block(nir_block *block)
|
|||
/* First source should be the sampling coordinate. */
|
||||
nir_tex_src *coord = &tex->src[idx];
|
||||
|
||||
if (ir3_nir_coord_offset(coord->src.ssa) >= 0) {
|
||||
tex->op = nir_texop_tex_prefetch;
|
||||
gl_system_value bary_type;
|
||||
if (ir3_nir_coord_offset(coord->src.ssa, &bary_type) >= 0) {
|
||||
enum ir3_bary bary = bary_type - SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL;
|
||||
state->per_bary_candidates[bary]++;
|
||||
|
||||
tex_prefetch_candidate *candidate = u_vector_add(&state->candidates);
|
||||
candidate->tex = tex;
|
||||
candidate->bary = bary;
|
||||
|
||||
progress |= true;
|
||||
}
|
||||
|
|
@ -179,7 +205,7 @@ lower_tex_prefetch_block(nir_block *block)
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_tex_prefetch_func(nir_function_impl *impl)
|
||||
lower_tex_prefetch_func(nir_function_impl *impl, ir3_prefetch_state *state)
|
||||
{
|
||||
/* Only instructions in the the outer-most block are considered eligible for
|
||||
* pre-dispatch, because they need to be move-able to the beginning of the
|
||||
|
|
@ -201,18 +227,22 @@ lower_tex_prefetch_func(nir_function_impl *impl)
|
|||
}
|
||||
}
|
||||
|
||||
bool progress = lower_tex_prefetch_block(block);
|
||||
bool progress = lower_tex_prefetch_block(block, state);
|
||||
|
||||
return nir_progress(progress, impl, nir_metadata_control_flow);
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_tex_prefetch(nir_shader *shader)
|
||||
ir3_nir_lower_tex_prefetch(nir_shader *shader,
|
||||
enum ir3_bary *prefetch_bary_type)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
ir3_prefetch_state state = {};
|
||||
u_vector_init(&state.candidates, 4, sizeof(tex_prefetch_candidate));
|
||||
|
||||
nir_foreach_function (function, shader) {
|
||||
/* Only texture sampling instructions inside the main function
|
||||
* are eligible for pre-dispatch.
|
||||
|
|
@ -220,8 +250,36 @@ ir3_nir_lower_tex_prefetch(nir_shader *shader)
|
|||
if (!function->impl || !function->is_entrypoint)
|
||||
continue;
|
||||
|
||||
progress |= lower_tex_prefetch_func(function->impl);
|
||||
progress |= lower_tex_prefetch_func(function->impl, &state);
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
/* We cannot prefetch tex ops that use different interpolation modes,
|
||||
* so we have to choose a single mode to prefetch. We select the
|
||||
* interpolation mode that would allow us to prefetch the most tex ops.
|
||||
*/
|
||||
uint32_t max_tex_with_bary = 0;
|
||||
uint32_t chosen_bary = 0;
|
||||
for (int i = 0; i < IJ_COUNT; i++) {
|
||||
if (state.per_bary_candidates[i] > max_tex_with_bary) {
|
||||
max_tex_with_bary = state.per_bary_candidates[i];
|
||||
chosen_bary = i;
|
||||
}
|
||||
}
|
||||
|
||||
tex_prefetch_candidate *candidate;
|
||||
u_vector_foreach(candidate, &state.candidates) {
|
||||
if (candidate->bary == chosen_bary) {
|
||||
candidate->tex->op = nir_texop_tex_prefetch;
|
||||
}
|
||||
}
|
||||
|
||||
*prefetch_bary_type = chosen_bary;
|
||||
} else {
|
||||
*prefetch_bary_type = IJ_COUNT;
|
||||
}
|
||||
|
||||
u_vector_finish(&state.candidates);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -879,6 +879,7 @@ struct ir3_shader_variant {
|
|||
/* texture sampler pre-dispatches */
|
||||
uint32_t num_sampler_prefetch;
|
||||
struct ir3_sampler_prefetch sampler_prefetch[IR3_MAX_SAMPLER_PREFETCH];
|
||||
enum ir3_bary prefetch_bary_type;
|
||||
|
||||
/* If true, the last use of helper invocations is the texture prefetch and
|
||||
* they should be disabled for the actual shader. Equivalent to adding
|
||||
|
|
|
|||
|
|
@ -1685,9 +1685,9 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
|||
ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
|
||||
|
||||
if (fs->num_sampler_prefetch > 0) {
|
||||
/* It seems like ij_pix is *required* to be r0.x */
|
||||
assert(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]) ||
|
||||
ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
|
||||
/* FS prefetch reads coordinates from r0.x */
|
||||
assert(!VALIDREG(ij_regid[fs->prefetch_bary_type]) ||
|
||||
ij_regid[fs->prefetch_bary_type] == regid(0, 0));
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
|
||||
|
|
|
|||
|
|
@ -939,9 +939,9 @@ emit_fs_inputs(struct fd_ringbuffer *ring, const struct program_builder *b)
|
|||
ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
|
||||
|
||||
if (fs->num_sampler_prefetch > 0) {
|
||||
/* It seems like ij_pix is *required* to be r0.x */
|
||||
assert(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]) ||
|
||||
ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
|
||||
/* FS prefetch reads coordinates from r0.x */
|
||||
assert(!VALIDREG(ij_regid[fs->prefetch_bary_type]) ||
|
||||
ij_regid[fs->prefetch_bary_type] == regid(0, 0));
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue