ir3, tu: Plumb through support for per-shader robustness

We need to pass through the robust_modes flag to nir_opt_vectorize based
on a flag set when compiling the shader, not globally in the compiler,
for VK_EXT_pipeline_robustness. Refactor the ir3 compiler interface
to add an ir3_shader_nir_options struct that can be passed around to
the appropriate places, and wire it up in turnip to the shader key. The
shader key replaces the old mechanism of hashing in the compiler
options.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31687>
This commit is contained in:
Connor Abbott 2024-10-15 13:03:08 -04:00 committed by Marge Bot
parent 3d066e5ef1
commit c323848b0b
14 changed files with 123 additions and 68 deletions

View file

@ -24,11 +24,6 @@ struct ir3_ra_reg_set;
struct ir3_shader;
struct ir3_compiler_options {
/* If true, UBO/SSBO accesses are assumed to be bounds-checked as defined by
* VK_EXT_robustness2 and optimizations may have to be more conservative.
*/
bool robust_buffer_access2;
/* If true, promote UBOs (except for constant data) to constants using ldc.k
* in the preamble. The driver should ignore everything in ubo_state except
* for the constant data UBO, which is excluded because the command pushing
@ -340,9 +335,6 @@ enum ir3_shader_debug {
/* MESA_DEBUG-only options: */
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
IR3_DBG_RAMSGS = BITFIELD_BIT(21),
/* Only used for the disk-caching logic: */
IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
};
extern enum ir3_shader_debug ir3_shader_debug;

View file

@ -66,7 +66,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
*/
ctx->s = nir_shader_clone(ctx, shader->nir);
ir3_nir_lower_variant(so, ctx->s);
ir3_nir_lower_variant(so, &shader->options.nir_options, ctx->s);
bool progress = false;
bool needs_late_alg = false;

View file

@ -45,8 +45,6 @@ ir3_disk_cache_init(struct ir3_compiler *compiler)
_mesa_sha1_format(timestamp, id_sha1);
uint64_t driver_flags = ir3_shader_debug;
if (compiler->options.robust_buffer_access2)
driver_flags |= IR3_DBG_ROBUST_UBO_ACCESS;
compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
}
@ -76,6 +74,8 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
sizeof(shader->options.api_wavesize));
_mesa_sha1_update(&ctx, &shader->options.real_wavesize,
sizeof(shader->options.real_wavesize));
_mesa_sha1_update(&ctx, &shader->options.nir_options,
sizeof(shader->options.nir_options));
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
* gens we maybe don't need to include stream-out in the cache key.

View file

@ -248,7 +248,9 @@ ir3_get_variable_size_align_bytes(const glsl_type *type, unsigned *size, unsigne
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
bool
ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
ir3_optimize_loop(struct ir3_compiler *compiler,
const struct ir3_shader_nir_options *options,
nir_shader *s)
{
MESA_TRACE_FUNC();
@ -324,8 +326,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
nir_load_store_vectorize_options vectorize_opts = {
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform,
.callback = ir3_nir_should_vectorize_mem,
.robust_modes = compiler->options.robust_buffer_access2 ?
nir_var_mem_ubo | nir_var_mem_ssbo : 0,
.robust_modes = options->robust_modes,
.cb_data = compiler,
};
progress |= OPT(s, nir_opt_load_store_vectorize, &vectorize_opts);
@ -495,7 +496,9 @@ ir3_nir_lower_array_sampler(nir_shader *shader)
}
void
ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
ir3_finalize_nir(struct ir3_compiler *compiler,
const struct ir3_shader_nir_options *options,
nir_shader *s)
{
MESA_TRACE_FUNC();
@ -536,7 +539,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
OPT_V(s, nir_lower_is_helper_invocation);
ir3_optimize_loop(compiler, s);
ir3_optimize_loop(compiler, options, s);
/* do idiv lowering after first opt loop to get a chance to propagate
* constants for divide by immed power-of-two:
@ -548,7 +551,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
idiv_progress |= OPT(s, nir_lower_idiv, &idiv_options);
if (idiv_progress)
ir3_optimize_loop(compiler, s);
ir3_optimize_loop(compiler, options, s);
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
@ -856,7 +859,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
if (compiler->gen >= 6)
OPT_V(s, ir3_nir_lower_ssbo_size, compiler->options.storage_16bit ? 1 : 2);
ir3_optimize_loop(compiler, s);
ir3_optimize_loop(compiler, &shader->options.nir_options, s);
}
static bool
@ -939,7 +942,9 @@ ir3_mem_access_size_align(nir_intrinsic_op intrin, uint8_t bytes,
}
void
ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
ir3_nir_lower_variant(struct ir3_shader_variant *so,
const struct ir3_shader_nir_options *options,
nir_shader *s)
{
MESA_TRACE_FUNC();
@ -1103,17 +1108,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
progress |= OPT(s, ir3_nir_lower_io_offsets);
if (progress)
ir3_optimize_loop(so->compiler, s);
ir3_optimize_loop(so->compiler, options, s);
/* verify that progress is always set */
assert(!ir3_optimize_loop(so->compiler, s));
assert(!ir3_optimize_loop(so->compiler, options, s));
/* Fixup indirect load_const_ir3's which end up with a const base offset
* which is too large to encode. Do this late(ish) so we actually
* can differentiate indirect vs non-indirect.
*/
if (OPT(s, ir3_nir_fixup_load_const_ir3))
ir3_optimize_loop(so->compiler, s);
ir3_optimize_loop(so->compiler, options, s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may

View file

@ -58,11 +58,17 @@ nir_mem_access_size_align ir3_mem_access_size_align(
uint32_t align_offset, bool offset_is_const, const void *cb_data);
bool ir3_nir_opt_branch_and_or_not(nir_shader *nir);
bool ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
bool ir3_optimize_loop(struct ir3_compiler *compiler,
const struct ir3_shader_nir_options *options,
nir_shader *s);
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
void ir3_finalize_nir(struct ir3_compiler *compiler,
const struct ir3_shader_nir_options *options,
nir_shader *s);
void ir3_nir_post_finalize(struct ir3_shader *shader);
void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
void ir3_nir_lower_variant(struct ir3_shader_variant *so,
const struct ir3_shader_nir_options *options,
nir_shader *s);
void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_const_state *const_state);

View file

@ -493,10 +493,10 @@ ir3_shader_passthrough_tcs(struct ir3_shader *vs, unsigned patch_vertices)
nir_shader_gather_info(tcs, nir_shader_get_entrypoint(tcs));
ir3_finalize_nir(vs->compiler, tcs);
struct ir3_shader_options ir3_options = {};
ir3_finalize_nir(vs->compiler, &ir3_options.nir_options, tcs);
vs->vs.passthrough_tcs[n] =
ir3_shader_from_nir(vs->compiler, tcs, &ir3_options, NULL);

View file

@ -507,6 +507,17 @@ struct ir3_disasm_info {
/* Represents half register in regid */
#define HALF_REG_ID 0x100
/* Options for common NIR optimization passes done in ir3. This is used for both
* finalize and post-finalize (where it has to be in the shader).
*/
struct ir3_shader_nir_options {
/* For the modes specified, accesses are assumed to be bounds-checked as
* defined by VK_EXT_robustness2 and optimizations may have to be more
* conservative.
*/
nir_variable_mode robust_modes;
};
struct ir3_shader_options {
unsigned num_reserved_user_consts;
/* What API-visible wavesizes are allowed. Even if only double wavesize is
@ -522,6 +533,8 @@ struct ir3_shader_options {
uint32_t push_consts_base;
uint32_t push_consts_dwords;
struct ir3_shader_nir_options nir_options;
};
/**

View file

@ -799,13 +799,14 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
ir3_finalize_nir(dev->compiler, nir);
const struct ir3_shader_options options = {
.num_reserved_user_consts = align(consts, 8),
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
};
ir3_finalize_nir(dev->compiler, &options.nir_options, nir);
struct ir3_shader *sh =
ir3_shader_from_nir(dev->compiler, nir, &options, NULL);

View file

@ -2363,7 +2363,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
{
struct ir3_compiler_options ir3_options = {
.robust_buffer_access2 = device->vk.enabled_features.robustBufferAccess2,
.push_ubo_with_preamble = true,
.disable_cache = true,
.bindless_fb_read_descriptor = -1,

View file

@ -1461,17 +1461,6 @@ tu_hash_stage(struct mesa_sha1 *ctx,
_mesa_sha1_update(ctx, key, sizeof(*key));
}
/* Hash flags which can affect ir3 shader compilation which aren't known until
* logical device creation.
*/
static void
tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler)
{
_mesa_sha1_update(ctx, &compiler->options.robust_buffer_access2,
sizeof(compiler->options.robust_buffer_access2));
_mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug));
}
static void
tu_hash_shaders(unsigned char *hash,
VkPipelineCreateFlags2KHR pipeline_flags,
@ -1479,8 +1468,7 @@ tu_hash_shaders(unsigned char *hash,
nir_shader *const *nir,
const struct tu_pipeline_layout *layout,
const struct tu_shader_key *keys,
VkGraphicsPipelineLibraryFlagsEXT state,
const struct ir3_compiler *compiler)
VkGraphicsPipelineLibraryFlagsEXT state)
{
struct mesa_sha1 ctx;
@ -1495,7 +1483,6 @@ tu_hash_shaders(unsigned char *hash,
}
}
_mesa_sha1_update(&ctx, &state, sizeof(state));
tu_hash_compiler(&ctx, compiler);
_mesa_sha1_final(&ctx, hash);
}
@ -1504,8 +1491,7 @@ tu_hash_compute(unsigned char *hash,
VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *stage,
const struct tu_pipeline_layout *layout,
const struct tu_shader_key *key,
const struct ir3_compiler *compiler)
const struct tu_shader_key *key)
{
struct mesa_sha1 ctx;
@ -1516,7 +1502,6 @@ tu_hash_compute(unsigned char *hash,
tu_hash_stage(&ctx, pipeline_flags, stage, NULL, key);
tu_hash_compiler(&ctx, compiler);
_mesa_sha1_final(&ctx, hash);
}
@ -1662,7 +1647,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
struct tu_pipeline *pipeline)
{
VkResult result = VK_SUCCESS;
const struct ir3_compiler *compiler = builder->device->compiler;
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
};
@ -1720,6 +1704,14 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
tu_shader_key_subgroup_size(&keys[stage], allow_varying_subgroup_size,
require_full_subgroups, subgroup_info,
builder->device);
if (stage_infos[stage]) {
struct vk_pipeline_robustness_state rs;
vk_pipeline_robustness_state_fill(&builder->device->vk, &rs,
builder->create_info->pNext,
stage_infos[stage]->pNext);
tu_shader_key_robustness(&keys[stage], &rs);
}
}
if ((builder->state &
@ -1831,7 +1823,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
unsigned char pipeline_sha1[20];
tu_hash_shaders(pipeline_sha1, builder->create_flags, stage_infos, nir,
&builder->layout, keys, builder->state, compiler);
&builder->layout, keys, builder->state);
unsigned char nir_sha1[21];
memcpy(nir_sha1, pipeline_sha1, sizeof(pipeline_sha1));
@ -4312,10 +4304,16 @@ tu_compute_pipeline_create(VkDevice device,
require_full_subgroups, subgroup_info,
dev);
struct vk_pipeline_robustness_state rs;
vk_pipeline_robustness_state_fill(&dev->vk, &rs,
pCreateInfo->pNext,
stage_info->pNext);
tu_shader_key_robustness(&key, &rs);
void *pipeline_mem_ctx = ralloc_context(NULL);
unsigned char pipeline_sha1[20];
tu_hash_compute(pipeline_sha1, flags, stage_info, layout, &key, dev->compiler);
tu_hash_compute(pipeline_sha1, flags, stage_info, layout, &key);
struct tu_shader *shader = NULL;
@ -4347,7 +4345,7 @@ tu_compute_pipeline_create(VkDevice device,
struct ir3_shader_key ir3_key = {};
nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, flags,
stage_info, MESA_SHADER_COMPUTE);
stage_info, &key, MESA_SHADER_COMPUTE);
nir_initial_disasm = executable_info ?
nir_shader_as_str(nir, pipeline->base.executables_mem_ctx) : NULL;

View file

@ -24,11 +24,23 @@
#include <initializer_list>
static void
init_ir3_nir_options(struct ir3_shader_nir_options *options,
const struct tu_shader_key *key)
{
*options = {
.robust_modes = (nir_variable_mode)
((key->robust_storage_access2 ? nir_var_mem_ssbo : 0) |
(key->robust_uniform_access2 ? nir_var_mem_ubo : 0)),
};
}
nir_shader *
tu_spirv_to_nir(struct tu_device *dev,
void *mem_ctx,
VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct tu_shader_key *key,
gl_shader_stage stage)
{
/* TODO these are made-up */
@ -106,7 +118,9 @@ tu_spirv_to_nir(struct tu_device *dev,
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_is_helper_invocation);
ir3_optimize_loop(dev->compiler, nir);
struct ir3_shader_nir_options options;
init_ir3_nir_options(&options, key);
ir3_optimize_loop(dev->compiler, &options, nir);
NIR_PASS_V(nir, nir_opt_conditional_discard);
@ -2517,7 +2531,10 @@ tu_shader_create(struct tu_device *dev,
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
ir3_finalize_nir(dev->compiler, nir);
struct ir3_shader_nir_options nir_options;
init_ir3_nir_options(&nir_options, key);
ir3_finalize_nir(dev->compiler, &nir_options, nir);
const struct ir3_shader_options options = {
.num_reserved_user_consts = reserved_consts_vec4,
@ -2526,6 +2543,7 @@ tu_shader_create(struct tu_device *dev,
.push_consts_type = shader->const_state.push_consts.type,
.push_consts_base = shader->const_state.push_consts.lo,
.push_consts_dwords = shader->const_state.push_consts.dwords,
.nir_options = nir_options,
};
struct ir3_shader *ir3_shader =
@ -2716,7 +2734,7 @@ tu_compile_shaders(struct tu_device *device,
int64_t stage_start = os_time_get_nano();
nir[stage] = tu_spirv_to_nir(device, mem_ctx, pipeline_flags,
stage_info, stage);
stage_info, &keys[stage], stage);
if (!nir[stage]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
@ -2887,6 +2905,16 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key,
key->real_wavesize = real_wavesize;
}
void
tu_shader_key_robustness(struct tu_shader_key *key,
const struct vk_pipeline_robustness_state *rs)
{
key->robust_storage_access2 =
(rs->storage_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT);
key->robust_uniform_access2 =
(rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT);
}
static VkResult
tu_empty_shader_create(struct tu_device *dev,
struct tu_shader **shader_out,

View file

@ -109,6 +109,8 @@ struct tu_shader_key {
bool fragment_density_map;
bool dynamic_renderpass;
uint8_t unscaled_input_fragcoord;
bool robust_storage_access2;
bool robust_uniform_access2;
enum ir3_wavesize_option api_wavesize, real_wavesize;
};
@ -121,6 +123,7 @@ tu_spirv_to_nir(struct tu_device *dev,
void *mem_ctx,
VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct tu_shader_key *key,
gl_shader_stage stage);
void
@ -169,6 +172,10 @@ tu_shader_key_subgroup_size(struct tu_shader_key *key,
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info,
struct tu_device *dev);
void
tu_shader_key_robustness(struct tu_shader_key *key,
const struct vk_pipeline_robustness_state *rs);
VkResult
tu_compile_shaders(struct tu_device *device,
VkPipelineCreateFlags2KHR pipeline_flags,

View file

@ -397,8 +397,10 @@ main(int argc, char **argv)
return -1;
}
const struct ir3_shader_nir_options options = {};
ir3_nir_lower_io_to_temporaries(nir);
ir3_finalize_nir(compiler, nir);
ir3_finalize_nir(compiler, &options, nir);
struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
shader->compiler = compiler;
@ -416,7 +418,7 @@ main(int argc, char **argv)
shader->variants = v;
shader->variant_count = 1;
ir3_nir_lower_variant(v, nir);
ir3_nir_lower_variant(v, &options, nir);
info = "NIR compiler";
ret = ir3_compile_shader_nir(compiler, shader, v);

View file

@ -275,6 +275,17 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
return NULL;
}
enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE;
enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE;
const struct ir3_shader_options ir3_options = {
/* TODO: force to single on a6xx with legacy ballot extension that uses
* 64-bit masks
*/
.api_wavesize = api_wavesize,
.real_wavesize = real_wavesize,
};
struct ir3_compiler *compiler = ctx->screen->compiler;
nir_shader *nir;
@ -290,7 +301,7 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
nir = nir_deserialize(NULL, options, &reader);
ir3_finalize_nir(compiler, nir);
ir3_finalize_nir(compiler, &ir3_options.nir_options, nir);
} else {
assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
if (ir3_shader_debug & IR3_DBG_DISASM) {
@ -302,22 +313,13 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
if (ctx->screen->gen >= 6)
ir3_nir_lower_io_to_bindless(nir);
enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE;
enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE;
if (ctx->screen->gen >= 6 && !ctx->screen->info->a6xx.supports_double_threadsize) {
api_wavesize = IR3_SINGLE_ONLY;
real_wavesize = IR3_SINGLE_ONLY;
}
struct ir3_shader *shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
.api_wavesize = api_wavesize,
.real_wavesize = real_wavesize,
}, NULL);
ir3_shader_from_nir(compiler, nir, &ir3_options, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
shader->cs.req_local_mem = cso->static_shared_mem;
@ -504,10 +506,12 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir)
{
struct fd_screen *screen = fd_screen(pscreen);
const struct ir3_shader_nir_options options = {};
MESA_TRACE_FUNC();
ir3_nir_lower_io_to_temporaries(nir);
ir3_finalize_nir(screen->compiler, nir);
ir3_finalize_nir(screen->compiler, &options, nir);
return NULL;
}