mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-21 07:30:33 +01:00
It was already computed in brw_shader::assign_curb_setup() so we can use it in brw_assign_urb_setup(). There was a mismatch between assign_curb_setup() and brw_assign_urb_setup() when push_sizes were not multiple of REG_SIZE, the first one was aligning every push_sizes before sum it, while brw_assign_urb_setup() was only aligning the sum of all push_size. By luck the only places that did not had a push_size aligned to REG_SIZE only had one push_size, so this was not an issue. So here also fixing this mismatch and adding an assert to caught any future mismatch. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39817>
360 lines
12 KiB
C
360 lines
12 KiB
C
/*
|
|
* Copyright © 2022 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
|
|
#include "compiler/intel_nir.h"
|
|
#include "compiler/brw/brw_compiler.h"
|
|
#include "compiler/brw/brw_nir.h"
|
|
#include "compiler/nir/nir.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "dev/intel_debug.h"
|
|
#include "intel/compiler/intel_nir.h"
|
|
#include "util/macros.h"
|
|
|
|
#include "vk_nir.h"
|
|
|
|
#include "anv_internal_kernels.h"
|
|
|
|
static bool
|
|
lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
|
|
UNUSED void *data)
|
|
{
|
|
if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
|
|
return false;
|
|
|
|
b->cursor = nir_instr_remove(&intrin->instr);
|
|
nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32));
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
check_sends(struct genisa_stats *stats, unsigned send_count)
|
|
{
|
|
assert(stats->spills == 0);
|
|
assert(stats->fills == 0);
|
|
assert(stats->sends == send_count);
|
|
}
|
|
|
|
static struct anv_shader_internal *
|
|
compile_shader(struct anv_device *device,
|
|
enum anv_internal_kernel_name shader_name,
|
|
mesa_shader_stage stage,
|
|
const char *name,
|
|
const void *hash_key,
|
|
uint32_t hash_key_size,
|
|
uint32_t sends_count_expectation)
|
|
{
|
|
const nir_shader_compiler_options *nir_options =
|
|
&device->physical->compiler->nir_options[stage];
|
|
|
|
nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
|
|
"%s", name);
|
|
|
|
uint32_t uniform_size =
|
|
anv_genX(device->info, call_internal_shader)(&b, shader_name);
|
|
|
|
nir_shader *nir = b.shader;
|
|
|
|
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
|
NIR_PASS(_, nir, nir_opt_cse);
|
|
NIR_PASS(_, nir, nir_opt_gcm, true);
|
|
|
|
nir_opt_peephole_select_options peephole_select_options = {
|
|
.limit = 1,
|
|
};
|
|
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
|
|
|
|
NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
|
|
|
|
NIR_PASS(_, nir, nir_split_var_copies);
|
|
NIR_PASS(_, nir, nir_split_per_member_structs);
|
|
|
|
if (stage == MESA_SHADER_COMPUTE) {
|
|
nir->info.workgroup_size[0] = 16;
|
|
nir->info.workgroup_size[1] = 1;
|
|
nir->info.workgroup_size[2] = 1;
|
|
}
|
|
|
|
struct brw_compiler *compiler = device->physical->compiler;
|
|
struct brw_nir_compiler_opts opts = {};
|
|
brw_preprocess_nir(compiler, nir, &opts);
|
|
|
|
NIR_PASS(_, nir, nir_propagate_invariant, false);
|
|
|
|
if (stage == MESA_SHADER_FRAGMENT) {
|
|
NIR_PASS(_, nir, nir_lower_input_attachments,
|
|
&(nir_input_attachment_options) { });
|
|
} else {
|
|
nir_lower_compute_system_values_options options = {
|
|
.has_base_workgroup_id = true,
|
|
.lower_cs_local_id_to_index = true,
|
|
.lower_workgroup_id_to_index = mesa_shader_stage_is_mesh(stage),
|
|
};
|
|
NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
|
|
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
|
|
nir_metadata_control_flow, NULL);
|
|
}
|
|
|
|
/* Reset sizes before gathering information */
|
|
nir->global_mem_size = 0;
|
|
nir->scratch_size = 0;
|
|
nir->info.shared_size = 0;
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
|
|
NIR_PASS(_, nir, nir_opt_copy_prop);
|
|
NIR_PASS(_, nir, nir_opt_constant_folding);
|
|
NIR_PASS(_, nir, nir_opt_dce);
|
|
|
|
union brw_any_prog_key key;
|
|
memset(&key, 0, sizeof(key));
|
|
|
|
union brw_any_prog_data prog_data;
|
|
memset(&prog_data, 0, sizeof(prog_data));
|
|
|
|
if (stage == MESA_SHADER_COMPUTE) {
|
|
/* Pick SIMD16, it shouldn't spill prior Xe2 and it's the native size
|
|
* after.
|
|
*/
|
|
nir->info.min_subgroup_size = nir->info.max_subgroup_size = 16;
|
|
|
|
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics,
|
|
device->info, &prog_data.cs);
|
|
}
|
|
|
|
/* Do vectorizing here. For some reason when trying to do it in the back
|
|
* this just isn't working.
|
|
*/
|
|
nir_load_store_vectorize_options options = {
|
|
.modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
|
|
.callback = brw_nir_should_vectorize_mem,
|
|
.robust_modes = (nir_variable_mode)0,
|
|
};
|
|
NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options);
|
|
|
|
prog_data.base.push_sizes[0] = uniform_size;
|
|
|
|
void *temp_ctx = ralloc_context(NULL);
|
|
|
|
const unsigned *program;
|
|
if (stage == MESA_SHADER_FRAGMENT) {
|
|
struct genisa_stats stats[3];
|
|
struct brw_compile_fs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.log_data = device,
|
|
.debug_flag = DEBUG_WM,
|
|
.stats = stats,
|
|
.mem_ctx = temp_ctx,
|
|
},
|
|
.key = &key.fs,
|
|
.prog_data = &prog_data.fs,
|
|
};
|
|
prog_data.base.push_sizes[0] = align(prog_data.base.push_sizes[0], REG_SIZE);
|
|
program = brw_compile_fs(compiler, ¶ms);
|
|
|
|
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
|
|
unsigned stat_idx = 0;
|
|
if (prog_data.fs.dispatch_8) {
|
|
check_sends(&stats[stat_idx++], sends_count_expectation);
|
|
}
|
|
if (prog_data.fs.dispatch_16) {
|
|
check_sends(&stats[stat_idx++], sends_count_expectation);
|
|
}
|
|
if (prog_data.fs.dispatch_32) {
|
|
check_sends(&stats[stat_idx++], sends_count_expectation *
|
|
(device->info->ver < 20 ? 2 : 1));
|
|
}
|
|
}
|
|
} else {
|
|
brw_cs_fill_push_const_info(device->info, &prog_data.cs, -1);
|
|
prog_data.base.push_sizes[0] = align(prog_data.base.push_sizes[0], REG_SIZE);
|
|
|
|
struct genisa_stats stats;
|
|
struct brw_compile_cs_params params = {
|
|
.base = {
|
|
.nir = nir,
|
|
.stats = &stats,
|
|
.log_data = device,
|
|
.debug_flag = DEBUG_CS,
|
|
.mem_ctx = temp_ctx,
|
|
},
|
|
.key = &key.cs,
|
|
.prog_data = &prog_data.cs,
|
|
};
|
|
program = brw_compile_cs(compiler, ¶ms);
|
|
|
|
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
|
|
check_sends(&stats, sends_count_expectation);
|
|
}
|
|
}
|
|
|
|
assert(prog_data.base.total_scratch == 0);
|
|
assert(program != NULL);
|
|
struct anv_shader_internal *kernel = NULL;
|
|
if (program == NULL)
|
|
goto exit;
|
|
|
|
struct anv_pipeline_bind_map empty_bind_map = {};
|
|
struct anv_push_descriptor_info empty_push_desc_info = {};
|
|
struct anv_shader_upload_params upload_params = {
|
|
.stage = nir->info.stage,
|
|
.key_data = hash_key,
|
|
.key_size = hash_key_size,
|
|
.kernel_data = program,
|
|
.kernel_size = prog_data.base.program_size,
|
|
.prog_data = &prog_data.base,
|
|
.prog_data_size = sizeof(prog_data),
|
|
.bind_map = &empty_bind_map,
|
|
.push_desc_info = &empty_push_desc_info,
|
|
};
|
|
|
|
kernel = anv_device_upload_kernel(device, device->internal_cache, &upload_params);
|
|
|
|
exit:
|
|
ralloc_free(temp_ctx);
|
|
ralloc_free(nir);
|
|
|
|
return kernel;
|
|
}
|
|
|
|
VkResult
|
|
anv_device_get_internal_shader(struct anv_device *device,
|
|
enum anv_internal_kernel_name name,
|
|
struct anv_shader_internal **out_bin)
|
|
{
|
|
const struct {
|
|
struct {
|
|
char name[40];
|
|
} key;
|
|
|
|
mesa_shader_stage stage;
|
|
|
|
uint32_t send_count;
|
|
} internal_kernels[] = {
|
|
[ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
|
|
.key = {
|
|
.name = "anv-generated-indirect-draws",
|
|
},
|
|
.stage = MESA_SHADER_FRAGMENT,
|
|
.send_count = (device->info->ver == 9 ?
|
|
/* 1 load +
|
|
* 4 stores +
|
|
* 2 * (2 loads + 2 stores) +
|
|
* 3 stores
|
|
*/
|
|
16 :
|
|
/* 1 load +
|
|
* 2 * (2 loads + 3 stores) +
|
|
* 3 stores
|
|
*/
|
|
14) +
|
|
/* 3 loads + 3 stores */
|
|
(intel_needs_workaround(device->info, 16011107343) ? 6 : 0) +
|
|
/* 3 loads + 3 stores */
|
|
(intel_needs_workaround(device->info, 22018402687) ? 6 : 0),
|
|
},
|
|
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
|
|
.key = {
|
|
.name = "anv-copy-query-compute",
|
|
},
|
|
.stage = MESA_SHADER_COMPUTE,
|
|
.send_count = device->info->verx10 >= 125 ?
|
|
9 /* 4 loads + 4 stores + 1 EOT */ :
|
|
8 /* 3 loads + 4 stores + 1 EOT */,
|
|
},
|
|
[ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
|
|
.key = {
|
|
.name = "anv-copy-query-fragment",
|
|
},
|
|
.stage = MESA_SHADER_FRAGMENT,
|
|
.send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
|
|
},
|
|
[ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
|
|
.key = {
|
|
.name = "anv-memcpy-compute",
|
|
},
|
|
.stage = MESA_SHADER_COMPUTE,
|
|
.send_count = device->info->verx10 >= 125 ?
|
|
10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
|
|
9 /* 4 loads + 4 stores + 1 EOT */,
|
|
},
|
|
};
|
|
|
|
struct anv_shader_internal *bin =
|
|
p_atomic_read(&device->internal_kernels[name]);
|
|
if (bin != NULL) {
|
|
*out_bin = bin;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
bin =
|
|
anv_device_search_for_kernel(device,
|
|
device->internal_cache,
|
|
&internal_kernels[name].key,
|
|
sizeof(internal_kernels[name].key),
|
|
NULL);
|
|
if (bin != NULL) {
|
|
p_atomic_set(&device->internal_kernels[name], bin);
|
|
*out_bin = bin;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
bin = compile_shader(device,
|
|
name,
|
|
internal_kernels[name].stage,
|
|
internal_kernels[name].key.name,
|
|
&internal_kernels[name].key,
|
|
sizeof(internal_kernels[name].key),
|
|
internal_kernels[name].send_count);
|
|
if (bin == NULL)
|
|
return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
|
|
"Unable to compiler internal kernel");
|
|
|
|
/* The cache already has a reference and it's not going anywhere so
|
|
* there is no need to hold a second reference.
|
|
*/
|
|
anv_shader_internal_unref(device, bin);
|
|
|
|
p_atomic_set(&device->internal_kernels[name], bin);
|
|
|
|
*out_bin = bin;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult
|
|
anv_device_init_internal_kernels(struct anv_device *device)
|
|
{
|
|
const struct intel_l3_weights w =
|
|
intel_get_default_l3_weights(device->info,
|
|
true /* wants_dc_cache */,
|
|
false /* needs_slm */);
|
|
device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
anv_device_finish_internal_kernels(struct anv_device *device)
|
|
{
|
|
}
|