2021-09-24 18:41:29 +02:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2021 Valve Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
#include "util/set.h"
|
2021-09-24 18:41:29 +02:00
|
|
|
#include "nir.h"
|
|
|
|
|
#include "nir_builder.h"
|
|
|
|
|
|
|
|
|
|
/* This pass provides a way to move computations that are always the same for
|
|
|
|
|
* an entire draw/compute dispatch into a "preamble" that runs before the main
|
|
|
|
|
* entrypoint.
|
|
|
|
|
*
|
|
|
|
|
* We also expose a separate API to get or construct the preamble of a shader
|
|
|
|
|
* in case backends want to insert their own code.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
nir_function_impl *
|
|
|
|
|
nir_shader_get_preamble(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
|
|
|
|
|
if (entrypoint->preamble) {
|
|
|
|
|
return entrypoint->preamble->impl;
|
|
|
|
|
} else {
|
|
|
|
|
nir_function *preamble = nir_function_create(shader, "@preamble");
|
|
|
|
|
preamble->is_preamble = true;
|
|
|
|
|
nir_function_impl *impl = nir_function_impl_create(preamble);
|
|
|
|
|
entrypoint->preamble = preamble;
|
|
|
|
|
return impl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
bool can_move;
|
|
|
|
|
bool candidate;
|
|
|
|
|
bool must_stay;
|
|
|
|
|
bool replace;
|
|
|
|
|
|
|
|
|
|
unsigned can_move_users;
|
|
|
|
|
|
|
|
|
|
unsigned size, align;
|
|
|
|
|
|
|
|
|
|
unsigned offset;
|
|
|
|
|
|
|
|
|
|
/* Average the cost of a value among its users, to try to account for
|
|
|
|
|
* values that have multiple can_move uses.
|
|
|
|
|
*/
|
|
|
|
|
float value;
|
|
|
|
|
|
|
|
|
|
/* Overall benefit, i.e. the value minus any cost to inserting
|
|
|
|
|
* load_preamble.
|
|
|
|
|
*/
|
|
|
|
|
float benefit;
|
|
|
|
|
} def_state;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
/* Per-definition array of states */
|
|
|
|
|
def_state *states;
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
/* Number of levels of non-uniform control flow we're in. We don't
|
|
|
|
|
* reconstruct loops, so loops count as non-uniform conservatively. If-else
|
|
|
|
|
* is counted if the condition is not marked can_move.
|
|
|
|
|
*/
|
|
|
|
|
unsigned nonuniform_cf_nesting;
|
|
|
|
|
|
|
|
|
|
/* Set of nir_if's that must be reconstructed in the preamble. Note an if may
|
|
|
|
|
* need reconstruction even when not entirely moved. This does not account
|
|
|
|
|
* for nesting: the parent CF nodes of ifs in this set must be reconstructed
|
|
|
|
|
* but may not be in this set, even if the parent is another if.
|
|
|
|
|
*/
|
|
|
|
|
struct set *reconstructed_ifs;
|
|
|
|
|
|
|
|
|
|
/* Set of definitions that must be reconstructed in the preamble. This is a
|
|
|
|
|
* subset of can_move instructions, determined after replacement.
|
|
|
|
|
*/
|
|
|
|
|
BITSET_WORD *reconstructed_defs;
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *def;
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
const nir_opt_preamble_options *options;
|
|
|
|
|
} opt_preamble_ctx;
|
|
|
|
|
|
nir/opt_preamble: Respect ACCESS_CAN_SPECULATE
In general, it is unsafe to speculatively hoist conditionally executed loads
into the preamble. For example, if the shader does:
if (ptr is valid) {
foo(*ptr)
}
we cannot dereference ptr in the preamble without knowing that the pointer is
valid (which may not be determinable, since it might not be uniform).
nir_opt_preamble needs to stop speculating in this case, or otherwise using
preambles can cause faults on legal shaders.
However, some platforms may be able to speculate loads safely. For example,
Apple hardware is able to suppress MMU faults, making speculation safe. This is
controlled global register to control this behaviour, set at boot-time by the
kernel. (macOS suppresses these faults unconditionally, this feature may be
used in their implementation of sparse textures. Currently Linux does not
suppress any faults but this may change later.)
Since nir_opt_preamble should work soundly and optimally on a variety of
platforms, we need to respect the ACCESS flag.
Thanks to the if-else hoisting implemented earlier in the series, this isn't too
terrible of a band-aid on Asahi:
total instructions in shared programs: 1499674 -> 1507699 (0.54%)
instructions in affected programs: 78865 -> 86890 (10.18%)
helped: 0
HURT: 337
Instructions are HURT.
total bytes in shared programs: 10238284 -> 10279308 (0.40%)
bytes in affected programs: 554504 -> 595528 (7.40%)
helped: 3
HURT: 334
Bytes are HURT.
total halfregs in shared programs: 452049 -> 454015 (0.43%)
halfregs in affected programs: 7569 -> 9535 (25.97%)
helped: 7
HURT: 150
Halfregs are HURT.
There are no shader-db changes on ir3 as expected, since ir3 can safely
speculate all instructions in my shader-db.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24011>
2023-07-18 16:35:49 -04:00
|
|
|
static bool
|
|
|
|
|
instr_can_speculate(nir_instr *instr)
|
|
|
|
|
{
|
|
|
|
|
/* Intrinsics with an ACCESS index can only be speculated if they are
|
|
|
|
|
* explicitly CAN_SPECULATE.
|
|
|
|
|
*/
|
|
|
|
|
if (instr->type == nir_instr_type_intrinsic) {
|
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
|
|
if (nir_intrinsic_has_access(intr))
|
|
|
|
|
return nir_intrinsic_access(intr) & ACCESS_CAN_SPECULATE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* For now, everything else can be speculated. TODO: Bindless textures. */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
static float
|
|
|
|
|
get_instr_cost(nir_instr *instr, const nir_opt_preamble_options *options)
|
|
|
|
|
{
|
|
|
|
|
/* No backend will want to hoist load_const or undef by itself, so handle
|
|
|
|
|
* this for them.
|
|
|
|
|
*/
|
|
|
|
|
if (instr->type == nir_instr_type_load_const ||
|
2023-08-15 09:59:06 -05:00
|
|
|
instr->type == nir_instr_type_undef)
|
2021-09-24 18:41:29 +02:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
return options->instr_cost_cb(instr, options->cb_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_move_src(nir_src *src, void *state)
|
|
|
|
|
{
|
|
|
|
|
opt_preamble_ctx *ctx = state;
|
|
|
|
|
|
|
|
|
|
return ctx->states[src->ssa->index].can_move;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_move_srcs(nir_instr *instr, opt_preamble_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
return nir_foreach_src(instr, can_move_src, ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
|
|
|
|
|
{
|
|
|
|
|
switch (instr->intrinsic) {
|
|
|
|
|
/* Intrinsics which can always be moved */
|
|
|
|
|
case nir_intrinsic_load_push_constant:
|
|
|
|
|
case nir_intrinsic_load_work_dim:
|
|
|
|
|
case nir_intrinsic_load_num_workgroups:
|
|
|
|
|
case nir_intrinsic_load_ray_launch_size:
|
2022-05-12 20:22:59 +02:00
|
|
|
case nir_intrinsic_load_sbt_base_amd:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_load_is_indexed_draw:
|
|
|
|
|
case nir_intrinsic_load_viewport_scale:
|
|
|
|
|
case nir_intrinsic_load_user_clip_plane:
|
|
|
|
|
case nir_intrinsic_load_viewport_x_scale:
|
|
|
|
|
case nir_intrinsic_load_viewport_y_scale:
|
|
|
|
|
case nir_intrinsic_load_viewport_z_scale:
|
|
|
|
|
case nir_intrinsic_load_viewport_offset:
|
|
|
|
|
case nir_intrinsic_load_viewport_x_offset:
|
|
|
|
|
case nir_intrinsic_load_viewport_y_offset:
|
|
|
|
|
case nir_intrinsic_load_viewport_z_offset:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_a_float:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_b_float:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_g_float:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_r_float:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_rgba:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
|
|
|
|
|
case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
|
|
|
|
|
case nir_intrinsic_load_line_width:
|
|
|
|
|
case nir_intrinsic_load_aa_line_width:
|
|
|
|
|
case nir_intrinsic_load_fb_layers_v3d:
|
2024-03-07 17:50:56 +01:00
|
|
|
case nir_intrinsic_load_fep_w_v3d:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_load_tcs_num_patches_amd:
|
|
|
|
|
case nir_intrinsic_load_sample_positions_pan:
|
2022-10-10 11:08:40 +08:00
|
|
|
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
|
|
|
|
|
case nir_intrinsic_load_prim_gen_query_enabled_amd:
|
2022-10-25 14:20:04 +08:00
|
|
|
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
|
2022-10-28 10:15:21 +08:00
|
|
|
case nir_intrinsic_load_clamp_vertex_color_amd:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_load_cull_front_face_enabled_amd:
|
|
|
|
|
case nir_intrinsic_load_cull_back_face_enabled_amd:
|
|
|
|
|
case nir_intrinsic_load_cull_ccw_amd:
|
|
|
|
|
case nir_intrinsic_load_cull_small_primitives_enabled_amd:
|
|
|
|
|
case nir_intrinsic_load_cull_any_enabled_amd:
|
|
|
|
|
case nir_intrinsic_load_cull_small_prim_precision_amd:
|
2022-11-24 20:40:42 -05:00
|
|
|
case nir_intrinsic_load_vbo_base_agx:
|
2021-09-24 18:41:29 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
/* Intrinsics which can be moved depending on hardware */
|
|
|
|
|
case nir_intrinsic_load_base_instance:
|
|
|
|
|
case nir_intrinsic_load_base_vertex:
|
|
|
|
|
case nir_intrinsic_load_first_vertex:
|
|
|
|
|
case nir_intrinsic_load_draw_id:
|
|
|
|
|
return ctx->options->drawid_uniform;
|
|
|
|
|
|
|
|
|
|
case nir_intrinsic_load_subgroup_size:
|
|
|
|
|
case nir_intrinsic_load_num_subgroups:
|
|
|
|
|
return ctx->options->subgroup_size_uniform;
|
|
|
|
|
|
2023-08-29 20:54:14 +02:00
|
|
|
case nir_intrinsic_load_workgroup_size:
|
|
|
|
|
return ctx->options->load_workgroup_size_allowed;
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
/* Intrinsics which can be moved if the sources can */
|
|
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
|
case nir_intrinsic_load_ubo_vec4:
|
|
|
|
|
case nir_intrinsic_get_ubo_size:
|
|
|
|
|
case nir_intrinsic_get_ssbo_size:
|
|
|
|
|
case nir_intrinsic_ballot_bitfield_extract:
|
|
|
|
|
case nir_intrinsic_ballot_find_lsb:
|
|
|
|
|
case nir_intrinsic_ballot_find_msb:
|
|
|
|
|
case nir_intrinsic_ballot_bit_count_reduce:
|
|
|
|
|
case nir_intrinsic_load_deref:
|
|
|
|
|
case nir_intrinsic_load_global_constant:
|
|
|
|
|
case nir_intrinsic_load_uniform:
|
2022-12-28 22:40:36 -05:00
|
|
|
case nir_intrinsic_load_preamble:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_load_constant:
|
|
|
|
|
case nir_intrinsic_load_sample_pos_from_id:
|
|
|
|
|
case nir_intrinsic_load_kernel_input:
|
|
|
|
|
case nir_intrinsic_load_buffer_amd:
|
2024-08-26 01:46:03 +02:00
|
|
|
case nir_intrinsic_image_levels:
|
|
|
|
|
case nir_intrinsic_image_deref_levels:
|
|
|
|
|
case nir_intrinsic_bindless_image_levels:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_image_samples:
|
|
|
|
|
case nir_intrinsic_image_deref_samples:
|
|
|
|
|
case nir_intrinsic_bindless_image_samples:
|
|
|
|
|
case nir_intrinsic_image_size:
|
|
|
|
|
case nir_intrinsic_image_deref_size:
|
|
|
|
|
case nir_intrinsic_bindless_image_size:
|
|
|
|
|
case nir_intrinsic_vulkan_resource_index:
|
|
|
|
|
case nir_intrinsic_vulkan_resource_reindex:
|
|
|
|
|
case nir_intrinsic_load_vulkan_descriptor:
|
|
|
|
|
case nir_intrinsic_quad_swizzle_amd:
|
|
|
|
|
case nir_intrinsic_masked_swizzle_amd:
|
|
|
|
|
case nir_intrinsic_load_ssbo_address:
|
|
|
|
|
case nir_intrinsic_bindless_resource_ir3:
|
2024-08-15 08:46:36 +02:00
|
|
|
case nir_intrinsic_load_const_ir3:
|
2022-11-24 20:40:42 -05:00
|
|
|
case nir_intrinsic_load_constant_agx:
|
2021-09-24 18:41:29 +02:00
|
|
|
return can_move_srcs(&instr->instr, ctx);
|
|
|
|
|
|
|
|
|
|
/* Image/SSBO loads can be moved if they are CAN_REORDER and their
|
|
|
|
|
* sources can be moved.
|
|
|
|
|
*/
|
|
|
|
|
case nir_intrinsic_image_load:
|
2022-07-19 01:23:44 -04:00
|
|
|
case nir_intrinsic_image_samples_identical:
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_intrinsic_bindless_image_load:
|
|
|
|
|
case nir_intrinsic_load_ssbo:
|
|
|
|
|
case nir_intrinsic_load_ssbo_ir3:
|
|
|
|
|
return (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) &&
|
2023-08-08 12:00:35 -05:00
|
|
|
can_move_srcs(&instr->instr, ctx);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
can_move_instr(nir_instr *instr, opt_preamble_ctx *ctx)
|
|
|
|
|
{
|
nir/opt_preamble: Respect ACCESS_CAN_SPECULATE
In general, it is unsafe to speculatively hoist conditionally executed loads
into the preamble. For example, if the shader does:
if (ptr is valid) {
foo(*ptr)
}
we cannot dereference ptr in the preamble without knowing that the pointer is
valid (which may not be determinable, since it might not be uniform).
nir_opt_preamble needs to stop speculating in this case, or otherwise using
preambles can cause faults on legal shaders.
However, some platforms may be able to speculate loads safely. For example,
Apple hardware is able to suppress MMU faults, making speculation safe. This is
controlled global register to control this behaviour, set at boot-time by the
kernel. (macOS suppresses these faults unconditionally, this feature may be
used in their implementation of sparse textures. Currently Linux does not
suppress any faults but this may change later.)
Since nir_opt_preamble should work soundly and optimally on a variety of
platforms, we need to respect the ACCESS flag.
Thanks to the if-else hoisting implemented earlier in the series, this isn't too
terrible of a band-aid on Asahi:
total instructions in shared programs: 1499674 -> 1507699 (0.54%)
instructions in affected programs: 78865 -> 86890 (10.18%)
helped: 0
HURT: 337
Instructions are HURT.
total bytes in shared programs: 10238284 -> 10279308 (0.40%)
bytes in affected programs: 554504 -> 595528 (7.40%)
helped: 3
HURT: 334
Bytes are HURT.
total halfregs in shared programs: 452049 -> 454015 (0.43%)
halfregs in affected programs: 7569 -> 9535 (25.97%)
helped: 7
HURT: 150
Halfregs are HURT.
There are no shader-db changes on ir3 as expected, since ir3 can safely
speculate all instructions in my shader-db.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24011>
2023-07-18 16:35:49 -04:00
|
|
|
/* If we are only contained within uniform control flow, no speculation is
|
|
|
|
|
* needed since the control flow will be reconstructed in the preamble. But
|
|
|
|
|
* if we are not, we must be able to speculate instructions to move them.
|
|
|
|
|
*/
|
|
|
|
|
if (ctx->nonuniform_cf_nesting > 0 && !instr_can_speculate(instr))
|
|
|
|
|
return false;
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
switch (instr->type) {
|
|
|
|
|
case nir_instr_type_tex: {
|
|
|
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
|
|
|
/* See note below about derivatives. We have special code to convert tex
|
|
|
|
|
* to txd, though, because it's a common case.
|
|
|
|
|
*/
|
|
|
|
|
if (nir_tex_instr_has_implicit_derivative(tex) &&
|
|
|
|
|
tex->op != nir_texop_tex) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return can_move_srcs(instr, ctx);
|
|
|
|
|
}
|
2024-09-04 13:22:10 +02:00
|
|
|
case nir_instr_type_alu:
|
|
|
|
|
return can_move_srcs(instr, ctx);
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
|
return can_move_intrinsic(nir_instr_as_intrinsic(instr), ctx);
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_load_const:
|
2023-08-15 09:59:06 -05:00
|
|
|
case nir_instr_type_undef:
|
2021-09-24 18:41:29 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
case nir_instr_type_deref: {
|
|
|
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
|
|
|
if (deref->deref_type == nir_deref_type_var) {
|
|
|
|
|
switch (deref->modes) {
|
|
|
|
|
case nir_var_uniform:
|
|
|
|
|
case nir_var_mem_ubo:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
return can_move_srcs(instr, ctx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
/* We can only move phis if all of their sources are movable, and it is a phi
|
|
|
|
|
* for an if-else that is itself movable.
|
|
|
|
|
*/
|
|
|
|
|
case nir_instr_type_phi: {
|
|
|
|
|
nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
|
|
|
|
|
if (!prev_node)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (prev_node->type != nir_cf_node_if) {
|
|
|
|
|
assert(prev_node->type == nir_cf_node_loop);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(prev_node);
|
|
|
|
|
if (!can_move_src(&nif->condition, ctx))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return can_move_srcs(instr, ctx);
|
|
|
|
|
}
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* True if we should avoid making this a candidate. This is only called on
|
|
|
|
|
* instructions we already determined we can move, this just makes it so that
|
|
|
|
|
* uses of this instruction cannot be rewritten. Typically this happens
|
|
|
|
|
* because of static constraints on the IR, for example some deref chains
|
|
|
|
|
* cannot be broken.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
avoid_instr(nir_instr *instr, const nir_opt_preamble_options *options)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type == nir_instr_type_deref)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return options->avoid_instr_cb(instr, options->cb_data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
update_src_value(nir_src *src, void *data)
|
|
|
|
|
{
|
|
|
|
|
opt_preamble_ctx *ctx = data;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx->states[ctx->def->index];
|
|
|
|
|
def_state *src_state = &ctx->states[src->ssa->index];
|
|
|
|
|
|
|
|
|
|
assert(src_state->can_move);
|
|
|
|
|
|
|
|
|
|
/* If an instruction has can_move and non-can_move users, it becomes a
|
|
|
|
|
* candidate and its value shouldn't propagate downwards. For example,
|
|
|
|
|
* imagine a chain like this:
|
|
|
|
|
*
|
|
|
|
|
* -- F (cannot move)
|
|
|
|
|
* /
|
|
|
|
|
* A <-- B <-- C <-- D <-- E (cannot move)
|
|
|
|
|
*
|
|
|
|
|
* B and D are marked candidates. Picking B removes A and B, picking D
|
|
|
|
|
* removes C and D, and picking both removes all 4. Therefore B and D are
|
|
|
|
|
* independent and B's value shouldn't flow into D.
|
|
|
|
|
*
|
|
|
|
|
* A similar argument holds for must_stay values.
|
|
|
|
|
*/
|
|
|
|
|
if (!src_state->must_stay && !src_state->candidate)
|
|
|
|
|
state->value += src_state->value;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
candidate_sort(const void *data1, const void *data2)
|
|
|
|
|
{
|
|
|
|
|
const def_state *state1 = *(def_state **)data1;
|
|
|
|
|
const def_state *state2 = *(def_state **)data2;
|
|
|
|
|
|
|
|
|
|
float value1 = state1->value / state1->size;
|
|
|
|
|
float value2 = state2->value / state2->size;
|
|
|
|
|
if (value1 < value2)
|
|
|
|
|
return 1;
|
|
|
|
|
else if (value1 > value2)
|
|
|
|
|
return -1;
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
static bool
|
2023-06-26 17:38:30 -04:00
|
|
|
calculate_can_move_for_block(opt_preamble_ctx *ctx, nir_block *block)
|
|
|
|
|
{
|
2023-06-30 11:29:35 -04:00
|
|
|
bool all_can_move = true;
|
|
|
|
|
|
2023-06-26 17:38:30 -04:00
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
nir_def *def = nir_instr_def(instr);
|
|
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx->states[def->index];
|
|
|
|
|
state->can_move = can_move_instr(instr, ctx);
|
2023-06-30 11:29:35 -04:00
|
|
|
all_can_move &= state->can_move;
|
2023-06-26 17:38:30 -04:00
|
|
|
}
|
2023-06-30 11:29:35 -04:00
|
|
|
|
|
|
|
|
return all_can_move;
|
2023-06-26 17:38:30 -04:00
|
|
|
}
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
static bool
|
2023-06-26 17:38:30 -04:00
|
|
|
calculate_can_move_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
|
2023-06-30 11:29:35 -04:00
|
|
|
{
|
|
|
|
|
bool all_can_move = true;
|
|
|
|
|
|
|
|
|
|
foreach_list_typed(nir_cf_node, node, node, list) {
|
|
|
|
|
switch (node->type) {
|
|
|
|
|
case nir_cf_node_block:
|
|
|
|
|
all_can_move &=
|
|
|
|
|
calculate_can_move_for_block(ctx, nir_cf_node_as_block(node));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(node);
|
|
|
|
|
bool uniform = can_move_src(&nif->condition, ctx);
|
|
|
|
|
|
|
|
|
|
if (!uniform)
|
|
|
|
|
ctx->nonuniform_cf_nesting++;
|
|
|
|
|
|
|
|
|
|
bool if_can_move = uniform;
|
|
|
|
|
if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->then_list);
|
|
|
|
|
if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->else_list);
|
|
|
|
|
|
|
|
|
|
if (!uniform)
|
|
|
|
|
ctx->nonuniform_cf_nesting--;
|
|
|
|
|
|
|
|
|
|
all_can_move &= if_can_move;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_loop: {
|
|
|
|
|
nir_loop *loop = nir_cf_node_as_loop(node);
|
|
|
|
|
|
|
|
|
|
/* Conservatively treat loops like conditional control flow, since an
|
|
|
|
|
* instruction might be conditionally unreachabled due to an earlier
|
|
|
|
|
* break in a loop that executes only one iteration.
|
|
|
|
|
*/
|
|
|
|
|
ctx->nonuniform_cf_nesting++;
|
|
|
|
|
calculate_can_move_for_cf_list(ctx, &loop->body);
|
|
|
|
|
ctx->nonuniform_cf_nesting--;
|
|
|
|
|
all_can_move = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unexpected CF node type");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return all_can_move;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,
|
|
|
|
|
struct hash_table *remap_table, nir_block *block)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
|
nir_def *def = nir_instr_def(instr);
|
|
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Only replace what we actually need. This is a micro-optimization for
|
|
|
|
|
* compile-time performance of regular instructions, but it's required for
|
|
|
|
|
* correctness with phi nodes, since we might not reconstruct the
|
|
|
|
|
* corresponding if.
|
|
|
|
|
*/
|
|
|
|
|
if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx->states[def->index];
|
|
|
|
|
assert(state->can_move && "reconstructed => can_move");
|
|
|
|
|
|
|
|
|
|
nir_instr *clone;
|
|
|
|
|
|
|
|
|
|
if (instr->type == nir_instr_type_phi) {
|
|
|
|
|
/* Phis are special since they can't be cloned with nir_instr_clone */
|
|
|
|
|
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
|
|
|
|
|
|
|
|
|
nir_cf_node *nif_cf = nir_cf_node_prev(&block->cf_node);
|
|
|
|
|
assert(nif_cf->type == nir_cf_node_if && "only if's are moveable");
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(nif_cf);
|
|
|
|
|
|
|
|
|
|
nir_block *then_block = nir_if_last_then_block(nif);
|
|
|
|
|
nir_block *else_block = nir_if_last_else_block(nif);
|
|
|
|
|
|
|
|
|
|
nir_def *then_def = NULL, *else_def = NULL;
|
|
|
|
|
|
|
|
|
|
nir_foreach_phi_src(phi_src, phi) {
|
|
|
|
|
if (phi_src->pred == then_block) {
|
|
|
|
|
assert(then_def == NULL);
|
|
|
|
|
then_def = phi_src->src.ssa;
|
|
|
|
|
} else if (phi_src->pred == else_block) {
|
|
|
|
|
assert(else_def == NULL);
|
|
|
|
|
else_def = phi_src->src.ssa;
|
|
|
|
|
} else {
|
|
|
|
|
unreachable("Invalid predecessor for phi of if");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(exec_list_length(&phi->srcs) == 2 && "only if's are movable");
|
|
|
|
|
assert(then_def && else_def && "all sources seen");
|
|
|
|
|
|
|
|
|
|
/* Remap */
|
|
|
|
|
then_def = _mesa_hash_table_search(remap_table, then_def)->data;
|
|
|
|
|
else_def = _mesa_hash_table_search(remap_table, else_def)->data;
|
|
|
|
|
|
|
|
|
|
b->cursor =
|
|
|
|
|
nir_before_block_after_phis(nir_cursor_current_block(b->cursor));
|
|
|
|
|
|
|
|
|
|
nir_def *repl = nir_if_phi(b, then_def, else_def);
|
|
|
|
|
clone = repl->parent_instr;
|
|
|
|
|
|
|
|
|
|
_mesa_hash_table_insert(remap_table, &phi->def, repl);
|
|
|
|
|
} else {
|
|
|
|
|
clone = nir_instr_clone_deep(b->shader, instr, remap_table);
|
|
|
|
|
nir_builder_instr_insert(b, clone);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (clone->type == nir_instr_type_tex) {
|
|
|
|
|
nir_tex_instr *tex = nir_instr_as_tex(clone);
|
|
|
|
|
if (tex->op == nir_texop_tex) {
|
|
|
|
|
/* For maximum compatibility, replace normal textures with
|
|
|
|
|
* textureGrad with a gradient of 0.
|
|
|
|
|
* TODO: Handle txb somehow.
|
|
|
|
|
*/
|
|
|
|
|
b->cursor = nir_before_instr(clone);
|
|
|
|
|
|
|
|
|
|
nir_def *zero =
|
|
|
|
|
nir_imm_zero(b, tex->coord_components - tex->is_array, 32);
|
|
|
|
|
nir_tex_instr_add_src(tex, nir_tex_src_ddx, zero);
|
|
|
|
|
nir_tex_instr_add_src(tex, nir_tex_src_ddy, zero);
|
|
|
|
|
tex->op = nir_texop_txd;
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_after_instr(clone);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (state->replace) {
|
|
|
|
|
nir_def *clone_def = nir_instr_def(clone);
|
|
|
|
|
nir_store_preamble(b, clone_def, .base = state->offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
replace_for_cf_list(nir_builder *b, opt_preamble_ctx *ctx,
|
|
|
|
|
struct hash_table *remap_table, struct exec_list *list)
|
2023-06-26 17:38:30 -04:00
|
|
|
{
|
|
|
|
|
foreach_list_typed(nir_cf_node, node, node, list) {
|
|
|
|
|
switch (node->type) {
|
|
|
|
|
case nir_cf_node_block: {
|
2023-06-30 11:29:35 -04:00
|
|
|
replace_for_block(b, ctx, remap_table, nir_cf_node_as_block(node));
|
2023-06-26 17:38:30 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(node);
|
2023-06-30 11:29:35 -04:00
|
|
|
|
|
|
|
|
/* If we moved something that requires reconstructing the if, do so */
|
|
|
|
|
if (_mesa_set_search(ctx->reconstructed_ifs, nif)) {
|
|
|
|
|
assert(can_move_src(&nif->condition, ctx));
|
|
|
|
|
|
|
|
|
|
struct hash_entry *entry =
|
|
|
|
|
_mesa_hash_table_search(remap_table, nif->condition.ssa);
|
|
|
|
|
assert(entry != NULL && "can_move condition, def dominates use");
|
|
|
|
|
nir_def *remap_cond = entry->data;
|
|
|
|
|
|
|
|
|
|
nir_if *reconstructed_nif = NULL;
|
|
|
|
|
reconstructed_nif = nir_push_if(b, remap_cond);
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_before_cf_list(&reconstructed_nif->then_list);
|
|
|
|
|
replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
|
|
|
|
|
|
|
|
|
|
b->cursor = nir_before_cf_list(&reconstructed_nif->else_list);
|
|
|
|
|
replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
|
|
|
|
|
|
|
|
|
|
nir_pop_if(b, reconstructed_nif);
|
|
|
|
|
b->cursor = nir_after_cf_node(&reconstructed_nif->cf_node);
|
|
|
|
|
} else {
|
|
|
|
|
replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
|
|
|
|
|
replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-26 17:38:30 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_loop: {
|
2023-06-30 11:29:35 -04:00
|
|
|
/* We don't try to reconstruct loops */
|
2023-06-26 17:38:30 -04:00
|
|
|
nir_loop *loop = nir_cf_node_as_loop(node);
|
2023-06-30 11:29:35 -04:00
|
|
|
replace_for_cf_list(b, ctx, remap_table, &loop->body);
|
2023-06-26 17:38:30 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unexpected CF node type");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
nir/opt_preamble: Respect ACCESS_CAN_SPECULATE
In general, it is unsafe to speculatively hoist conditionally executed loads
into the preamble. For example, if the shader does:
if (ptr is valid) {
foo(*ptr)
}
we cannot dereference ptr in the preamble without knowing that the pointer is
valid (which may not be determinable, since it might not be uniform).
nir_opt_preamble needs to stop speculating in this case, or otherwise using
preambles can cause faults on legal shaders.
However, some platforms may be able to speculate loads safely. For example,
Apple hardware is able to suppress MMU faults, making speculation safe. This is
controlled global register to control this behaviour, set at boot-time by the
kernel. (macOS suppresses these faults unconditionally, this feature may be
used in their implementation of sparse textures. Currently Linux does not
suppress any faults but this may change later.)
Since nir_opt_preamble should work soundly and optimally on a variety of
platforms, we need to respect the ACCESS flag.
Thanks to the if-else hoisting implemented earlier in the series, this isn't too
terrible of a band-aid on Asahi:
total instructions in shared programs: 1499674 -> 1507699 (0.54%)
instructions in affected programs: 78865 -> 86890 (10.18%)
helped: 0
HURT: 337
Instructions are HURT.
total bytes in shared programs: 10238284 -> 10279308 (0.40%)
bytes in affected programs: 554504 -> 595528 (7.40%)
helped: 3
HURT: 334
Bytes are HURT.
total halfregs in shared programs: 452049 -> 454015 (0.43%)
halfregs in affected programs: 7569 -> 9535 (25.97%)
helped: 7
HURT: 150
Halfregs are HURT.
There are no shader-db changes on ir3 as expected, since ir3 can safely
speculate all instructions in my shader-db.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24011>
2023-07-18 16:35:49 -04:00
|
|
|
/*
|
|
|
|
|
* If an if-statement contains an instruction that cannot be speculated, the
|
|
|
|
|
* if-statement must be reconstructed so we avoid the speculation. This applies
|
|
|
|
|
* even for nested if-statements. Determine which if-statements must be
|
|
|
|
|
* reconstructed for this reason by walking the program forward and looking
|
|
|
|
|
* inside uniform if's.
|
|
|
|
|
*
|
|
|
|
|
* Returns whether the CF list contains a reconstructed instruction that would
|
|
|
|
|
* otherwise be speculated, updating the reconstructed_ifs set. This depends on
|
|
|
|
|
* reconstructed_defs being correctly set by analyze_reconstructed.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
analyze_speculation_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
|
|
|
|
|
{
|
|
|
|
|
bool reconstruct_cf_list = false;
|
|
|
|
|
|
|
|
|
|
foreach_list_typed(nir_cf_node, node, node, list) {
|
|
|
|
|
switch (node->type) {
|
|
|
|
|
case nir_cf_node_block: {
|
|
|
|
|
nir_foreach_instr(instr, nir_cf_node_as_block(node)) {
|
|
|
|
|
nir_def *def = nir_instr_def(instr);
|
|
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (!instr_can_speculate(instr)) {
|
|
|
|
|
reconstruct_cf_list = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case nir_cf_node_if: {
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(node);
|
|
|
|
|
|
|
|
|
|
/* If we can move the if, we might need to reconstruct */
|
|
|
|
|
if (can_move_src(&nif->condition, ctx)) {
|
|
|
|
|
bool any = false;
|
|
|
|
|
any |= analyze_speculation_for_cf_list(ctx, &nif->then_list);
|
|
|
|
|
any |= analyze_speculation_for_cf_list(ctx, &nif->else_list);
|
|
|
|
|
|
|
|
|
|
if (any)
|
|
|
|
|
_mesa_set_add(ctx->reconstructed_ifs, nif);
|
|
|
|
|
|
|
|
|
|
reconstruct_cf_list |= any;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We don't reconstruct loops */
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return reconstruct_cf_list;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
static bool
|
|
|
|
|
mark_reconstructed(nir_src *src, void *state)
|
|
|
|
|
{
|
|
|
|
|
BITSET_WORD *reconstructed_defs = state;
|
|
|
|
|
BITSET_SET(reconstructed_defs, src->ssa->index);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If a phi is moved into the preamble, then the if it depends on must also be
|
|
|
|
|
* moved. However, it is not necessary to consider any nested control flow. As
|
|
|
|
|
* an example, if we have a shader:
|
|
|
|
|
*
|
|
|
|
|
* if (not moveable condition) {
|
|
|
|
|
* if (moveable condition) {
|
|
|
|
|
* x = moveable
|
|
|
|
|
* }
|
|
|
|
|
* y = phi x, moveable
|
|
|
|
|
* z = floor y
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* Then if 'z' is in the replace set, we need to reconstruct the inner if, but
|
|
|
|
|
* not the outer if, unless there's also speculation to worry about.
|
|
|
|
|
*
|
|
|
|
|
* We do this by marking defs that need to be reconstructed, with a backwards
|
|
|
|
|
* sweep of the program (compatible with reverse dominance), and marking the
|
|
|
|
|
* if's preceding reconstructed phis.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
analyze_reconstructed(opt_preamble_ctx *ctx, nir_function_impl *impl)
|
|
|
|
|
{
|
|
|
|
|
nir_foreach_block_reverse(block, impl) {
|
|
|
|
|
/* If an if-statement is reconstructed, its condition must be as well */
|
|
|
|
|
nir_if *nif = nir_block_get_following_if(block);
|
|
|
|
|
if (nif && _mesa_set_search(ctx->reconstructed_ifs, nif))
|
|
|
|
|
BITSET_SET(ctx->reconstructed_defs, nif->condition.ssa->index);
|
|
|
|
|
|
|
|
|
|
nir_foreach_instr_reverse(instr, block) {
|
|
|
|
|
nir_def *def = nir_instr_def(instr);
|
|
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx->states[def->index];
|
|
|
|
|
|
|
|
|
|
/* Anything that's replaced must be reconstructed */
|
|
|
|
|
if (state->replace)
|
|
|
|
|
BITSET_SET(ctx->reconstructed_defs, def->index);
|
|
|
|
|
else if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* If it must be reconstructed, it better be moveable */
|
|
|
|
|
assert(state->can_move);
|
|
|
|
|
|
|
|
|
|
/* Anything that depends on something reconstructed is reconstructed */
|
|
|
|
|
nir_foreach_src(instr, mark_reconstructed, ctx->reconstructed_defs);
|
|
|
|
|
|
|
|
|
|
/* Reconstructed phis need their ifs reconstructed */
|
|
|
|
|
if (instr->type == nir_instr_type_phi) {
|
|
|
|
|
nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
|
|
|
|
|
|
|
|
|
|
/* Invariants guaranteed by can_move_instr */
|
|
|
|
|
assert(prev_node != NULL);
|
|
|
|
|
assert(prev_node->type == nir_cf_node_if);
|
|
|
|
|
|
|
|
|
|
nir_if *nif = nir_cf_node_as_if(prev_node);
|
|
|
|
|
assert(can_move_src(&nif->condition, ctx));
|
|
|
|
|
|
|
|
|
|
/* Mark the if for reconstruction */
|
|
|
|
|
_mesa_set_add(ctx->reconstructed_ifs, nif);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
bool
|
|
|
|
|
nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
|
|
|
|
unsigned *size)
|
|
|
|
|
{
|
|
|
|
|
opt_preamble_ctx ctx = {
|
|
|
|
|
.options = options,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
|
ctx.states = calloc(impl->ssa_alloc, sizeof(*ctx.states));
|
|
|
|
|
|
|
|
|
|
/* Step 1: Calculate can_move */
|
2023-06-26 17:38:30 -04:00
|
|
|
calculate_can_move_for_cf_list(&ctx, &impl->body);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
/* Step 2: Calculate is_candidate. This is complicated by the presence of
|
|
|
|
|
* non-candidate instructions like derefs whose users cannot be rewritten.
|
|
|
|
|
* If a deref chain is used at all by a non-can_move thing, then any offset
|
|
|
|
|
* sources anywhere along the chain should be considered candidates because
|
|
|
|
|
* the entire deref chain will never be deleted, but if it's only used by
|
|
|
|
|
* can_move things then it becomes subsumed by its users and none of the
|
|
|
|
|
* offset sources should be considered candidates as they will be removed
|
|
|
|
|
* when the users of the deref chain are moved. We need to replace "are
|
|
|
|
|
* there any non-can_move users" with "are there any non-can_move users,
|
|
|
|
|
* *recursing through non-candidate users*". We do this by walking backward
|
|
|
|
|
* and marking when a non-candidate instruction must stay in the final
|
|
|
|
|
* program because it has a non-can_move user, including recursively.
|
|
|
|
|
*/
|
|
|
|
|
unsigned num_candidates = 0;
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_block_reverse(block, impl) {
|
|
|
|
|
nir_foreach_instr_reverse(instr, block) {
|
2023-08-15 12:05:54 -05:00
|
|
|
nir_def *def = nir_instr_def(instr);
|
2021-09-24 18:41:29 +02:00
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx.states[def->index];
|
|
|
|
|
if (!state->can_move)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
state->value = get_instr_cost(instr, options);
|
|
|
|
|
bool is_candidate = !avoid_instr(instr, options);
|
|
|
|
|
state->candidate = false;
|
|
|
|
|
state->must_stay = false;
|
2023-06-30 13:01:06 -04:00
|
|
|
nir_foreach_use_including_if(use, def) {
|
|
|
|
|
bool is_can_move_user;
|
|
|
|
|
|
|
|
|
|
if (nir_src_is_if(use)) {
|
|
|
|
|
is_can_move_user = false;
|
|
|
|
|
} else {
|
|
|
|
|
nir_def *use_def = nir_instr_def(nir_src_parent_instr(use));
|
|
|
|
|
is_can_move_user = use_def != NULL &&
|
|
|
|
|
ctx.states[use_def->index].can_move &&
|
|
|
|
|
!ctx.states[use_def->index].must_stay;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_can_move_user) {
|
|
|
|
|
state->can_move_users++;
|
|
|
|
|
} else {
|
2021-09-24 18:41:29 +02:00
|
|
|
if (is_candidate)
|
|
|
|
|
state->candidate = true;
|
|
|
|
|
else
|
|
|
|
|
state->must_stay = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (state->candidate)
|
|
|
|
|
num_candidates++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_candidates == 0) {
|
|
|
|
|
free(ctx.states);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def_state **candidates = malloc(sizeof(*candidates) * num_candidates);
|
|
|
|
|
unsigned candidate_idx = 0;
|
|
|
|
|
unsigned total_size = 0;
|
|
|
|
|
|
|
|
|
|
/* Step 3: Calculate value of candidates by propagating downwards. We try
|
|
|
|
|
* to share the value amongst can_move uses, in case there are multiple.
|
|
|
|
|
* This won't always find the most optimal solution, but is hopefully a
|
|
|
|
|
* good heuristic.
|
|
|
|
|
*
|
|
|
|
|
* Note that we use the can_move adjusted in the last pass, because if a
|
|
|
|
|
* can_move instruction cannot be moved because it's not a candidate and it
|
|
|
|
|
* has a non-can_move source then we don't want to count it as a use.
|
|
|
|
|
*
|
|
|
|
|
* While we're here, also collect an array of candidates.
|
|
|
|
|
*/
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr(instr, block) {
|
2023-08-15 12:05:54 -05:00
|
|
|
nir_def *def = nir_instr_def(instr);
|
2021-09-24 18:41:29 +02:00
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
def_state *state = &ctx.states[def->index];
|
|
|
|
|
if (!state->can_move || state->must_stay)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
ctx.def = def;
|
|
|
|
|
nir_foreach_src(instr, update_src_value, &ctx);
|
2023-08-08 12:00:35 -05:00
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
/* If this instruction is a candidate, its value shouldn't be
|
|
|
|
|
* propagated so we skip dividing it.
|
|
|
|
|
*
|
|
|
|
|
* Note: if it's can_move but not a candidate, then all its users
|
|
|
|
|
* must be can_move, so if there are no users then it must be dead.
|
|
|
|
|
*/
|
|
|
|
|
if (!state->candidate && !state->must_stay) {
|
|
|
|
|
if (state->can_move_users > 0)
|
|
|
|
|
state->value /= state->can_move_users;
|
|
|
|
|
else
|
|
|
|
|
state->value = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (state->candidate) {
|
|
|
|
|
state->benefit = state->value -
|
2023-08-08 12:00:35 -05:00
|
|
|
options->rewrite_cost_cb(def, options->cb_data);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
if (state->benefit > 0) {
|
|
|
|
|
options->def_size(def, &state->size, &state->align);
|
|
|
|
|
total_size = ALIGN_POT(total_size, state->align);
|
|
|
|
|
total_size += state->size;
|
|
|
|
|
candidates[candidate_idx++] = state;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(candidate_idx <= num_candidates);
|
|
|
|
|
num_candidates = candidate_idx;
|
|
|
|
|
|
|
|
|
|
if (num_candidates == 0) {
|
|
|
|
|
free(ctx.states);
|
|
|
|
|
free(candidates);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Step 4: Figure out which candidates we're going to replace and assign an
|
|
|
|
|
* offset. Assuming there is no expression sharing, this is similar to the
|
|
|
|
|
* 0-1 knapsack problem, except when there is a gap introduced by
|
|
|
|
|
* alignment. We use a well-known greedy approximation, sorting by value
|
|
|
|
|
* divided by size.
|
|
|
|
|
*/
|
|
|
|
|
|
2022-12-28 21:56:00 -05:00
|
|
|
if (((*size) + total_size) > options->preamble_storage_size) {
|
2023-08-08 12:00:35 -05:00
|
|
|
qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort);
|
2021-09-24 18:41:29 +02:00
|
|
|
}
|
|
|
|
|
|
2022-12-28 21:56:00 -05:00
|
|
|
unsigned offset = *size;
|
2021-09-24 18:41:29 +02:00
|
|
|
for (unsigned i = 0; i < num_candidates; i++) {
|
|
|
|
|
def_state *state = candidates[i];
|
|
|
|
|
offset = ALIGN_POT(offset, state->align);
|
|
|
|
|
|
|
|
|
|
if (offset + state->size > options->preamble_storage_size)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
state->replace = true;
|
|
|
|
|
state->offset = offset;
|
|
|
|
|
|
|
|
|
|
offset += state->size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*size = offset;
|
|
|
|
|
|
|
|
|
|
free(candidates);
|
|
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
/* Determine which if's need to be reconstructed, based on the replacements
|
|
|
|
|
* we did.
|
|
|
|
|
*/
|
|
|
|
|
ctx.reconstructed_ifs = _mesa_pointer_set_create(NULL);
|
|
|
|
|
ctx.reconstructed_defs = calloc(BITSET_WORDS(impl->ssa_alloc),
|
|
|
|
|
sizeof(BITSET_WORD));
|
|
|
|
|
analyze_reconstructed(&ctx, impl);
|
|
|
|
|
|
nir/opt_preamble: Respect ACCESS_CAN_SPECULATE
In general, it is unsafe to speculatively hoist conditionally executed loads
into the preamble. For example, if the shader does:
if (ptr is valid) {
foo(*ptr)
}
we cannot dereference ptr in the preamble without knowing that the pointer is
valid (which may not be determinable, since it might not be uniform).
nir_opt_preamble needs to stop speculating in this case, or otherwise using
preambles can cause faults on legal shaders.
However, some platforms may be able to speculate loads safely. For example,
Apple hardware is able to suppress MMU faults, making speculation safe. This is
controlled global register to control this behaviour, set at boot-time by the
kernel. (macOS suppresses these faults unconditionally, this feature may be
used in their implementation of sparse textures. Currently Linux does not
suppress any faults but this may change later.)
Since nir_opt_preamble should work soundly and optimally on a variety of
platforms, we need to respect the ACCESS flag.
Thanks to the if-else hoisting implemented earlier in the series, this isn't too
terrible of a band-aid on Asahi:
total instructions in shared programs: 1499674 -> 1507699 (0.54%)
instructions in affected programs: 78865 -> 86890 (10.18%)
helped: 0
HURT: 337
Instructions are HURT.
total bytes in shared programs: 10238284 -> 10279308 (0.40%)
bytes in affected programs: 554504 -> 595528 (7.40%)
helped: 3
HURT: 334
Bytes are HURT.
total halfregs in shared programs: 452049 -> 454015 (0.43%)
halfregs in affected programs: 7569 -> 9535 (25.97%)
helped: 7
HURT: 150
Halfregs are HURT.
There are no shader-db changes on ir3 as expected, since ir3 can safely
speculate all instructions in my shader-db.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24011>
2023-07-18 16:35:49 -04:00
|
|
|
/* If we make progress analyzing speculation, we need to re-analyze
|
|
|
|
|
* reconstructed defs to get the if-conditions in there.
|
|
|
|
|
*/
|
|
|
|
|
if (analyze_speculation_for_cf_list(&ctx, &impl->body))
|
|
|
|
|
analyze_reconstructed(&ctx, impl);
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
/* Step 5: Actually do the replacement. */
|
|
|
|
|
struct hash_table *remap_table =
|
|
|
|
|
_mesa_pointer_hash_table_create(NULL);
|
|
|
|
|
nir_function_impl *preamble =
|
|
|
|
|
nir_shader_get_preamble(impl->function->shader);
|
2023-08-28 13:56:53 -04:00
|
|
|
nir_builder preamble_builder = nir_builder_at(nir_before_impl(preamble));
|
2023-07-04 17:26:56 +08:00
|
|
|
nir_builder *b = &preamble_builder;
|
2021-09-24 18:41:29 +02:00
|
|
|
|
2023-06-30 11:29:35 -04:00
|
|
|
replace_for_cf_list(b, &ctx, remap_table, &impl->body);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
2023-07-04 17:26:56 +08:00
|
|
|
nir_builder builder = nir_builder_create(impl);
|
|
|
|
|
b = &builder;
|
2021-09-24 18:41:29 +02:00
|
|
|
|
2023-06-30 14:21:29 -04:00
|
|
|
unsigned max_index = impl->ssa_alloc;
|
2023-08-08 12:00:35 -05:00
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
2023-08-15 12:05:54 -05:00
|
|
|
nir_def *def = nir_instr_def(instr);
|
2021-09-24 18:41:29 +02:00
|
|
|
if (!def)
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-06-30 14:21:29 -04:00
|
|
|
/* Ignore new load_preamble instructions */
|
|
|
|
|
if (def->index >= max_index)
|
|
|
|
|
continue;
|
|
|
|
|
|
2021-09-24 18:41:29 +02:00
|
|
|
def_state *state = &ctx.states[def->index];
|
|
|
|
|
if (!state->replace)
|
|
|
|
|
continue;
|
|
|
|
|
|
2023-06-30 14:21:29 -04:00
|
|
|
b->cursor = nir_after_instr_and_phis(instr);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *new_def =
|
2021-09-24 18:41:29 +02:00
|
|
|
nir_load_preamble(b, def->num_components, def->bit_size,
|
|
|
|
|
.base = state->offset);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def_rewrite_uses(def, new_def);
|
2021-09-24 18:41:29 +02:00
|
|
|
nir_instr_free_and_dce(instr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nir_metadata_preserve(impl,
|
2024-06-16 16:32:01 -04:00
|
|
|
nir_metadata_control_flow);
|
2021-09-24 18:41:29 +02:00
|
|
|
|
|
|
|
|
ralloc_free(remap_table);
|
|
|
|
|
free(ctx.states);
|
2023-06-30 11:29:35 -04:00
|
|
|
free(ctx.reconstructed_defs);
|
|
|
|
|
_mesa_set_destroy(ctx.reconstructed_ifs, NULL);
|
2021-09-24 18:41:29 +02:00
|
|
|
return true;
|
|
|
|
|
}
|