mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
189 lines
7.1 KiB
C++
189 lines
7.1 KiB
C++
|
|
/*
|
||
|
|
* Copyright © 2013 Intel Corporation
|
||
|
|
* SPDX-License-Identifier: MIT
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "intel_nir.h"
|
||
|
|
#include "brw_nir.h"
|
||
|
|
#include "brw_vec4_tcs.h"
|
||
|
|
#include "brw_fs.h"
|
||
|
|
#include "brw_private.h"
|
||
|
|
#include "dev/intel_debug.h"
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Return the number of patches to accumulate before a MULTI_PATCH mode thread is
|
||
|
|
* launched. In cases with a large number of input control points and a large
|
||
|
|
* amount of VS outputs, the VS URB space needed to store an entire 8 patches
|
||
|
|
* worth of data can be prohibitive, so it can be beneficial to launch threads
|
||
|
|
* early.
|
||
|
|
*
|
||
|
|
* See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended
|
||
|
|
* values. Note that 0 means to "disable" early dispatch, meaning to wait for
|
||
|
|
* a full 8 patches as normal.
|
||
|
|
*/
|
||
|
|
static int
|
||
|
|
get_patch_count_threshold(int input_control_points)
|
||
|
|
{
|
||
|
|
if (input_control_points <= 4)
|
||
|
|
return 0;
|
||
|
|
else if (input_control_points <= 6)
|
||
|
|
return 5;
|
||
|
|
else if (input_control_points <= 8)
|
||
|
|
return 4;
|
||
|
|
else if (input_control_points <= 10)
|
||
|
|
return 3;
|
||
|
|
else if (input_control_points <= 14)
|
||
|
|
return 2;
|
||
|
|
|
||
|
|
/* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
extern "C" const unsigned *
|
||
|
|
brw_compile_tcs(const struct brw_compiler *compiler,
|
||
|
|
struct brw_compile_tcs_params *params)
|
||
|
|
{
|
||
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
||
|
|
nir_shader *nir = params->base.nir;
|
||
|
|
const struct brw_tcs_prog_key *key = params->key;
|
||
|
|
struct brw_tcs_prog_data *prog_data = params->prog_data;
|
||
|
|
struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
|
||
|
|
|
||
|
|
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
|
||
|
|
const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS);
|
||
|
|
const unsigned *assembly;
|
||
|
|
|
||
|
|
vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
|
||
|
|
prog_data->base.base.ray_queries = nir->info.ray_queries;
|
||
|
|
prog_data->base.base.total_scratch = 0;
|
||
|
|
|
||
|
|
nir->info.outputs_written = key->outputs_written;
|
||
|
|
nir->info.patch_outputs_written = key->patch_outputs_written;
|
||
|
|
|
||
|
|
struct intel_vue_map input_vue_map;
|
||
|
|
brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
|
||
|
|
nir->info.separate_shader, 1);
|
||
|
|
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
|
||
|
|
nir->info.outputs_written,
|
||
|
|
nir->info.patch_outputs_written);
|
||
|
|
|
||
|
|
brw_nir_apply_key(nir, compiler, &key->base, 8);
|
||
|
|
brw_nir_lower_vue_inputs(nir, &input_vue_map);
|
||
|
|
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
|
||
|
|
key->_tes_primitive_mode);
|
||
|
|
if (key->quads_workaround)
|
||
|
|
intel_nir_apply_tcs_quads_workaround(nir);
|
||
|
|
if (key->input_vertices > 0)
|
||
|
|
intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
|
||
|
|
|
||
|
|
brw_postprocess_nir(nir, compiler, debug_enabled,
|
||
|
|
key->base.robust_flags);
|
||
|
|
|
||
|
|
bool has_primitive_id =
|
||
|
|
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
||
|
|
|
||
|
|
prog_data->patch_count_threshold = get_patch_count_threshold(key->input_vertices);
|
||
|
|
|
||
|
|
if (compiler->use_tcs_multi_patch) {
|
||
|
|
vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_MULTI_PATCH;
|
||
|
|
prog_data->instances = nir->info.tess.tcs_vertices_out;
|
||
|
|
prog_data->include_primitive_id = has_primitive_id;
|
||
|
|
} else {
|
||
|
|
unsigned verts_per_thread = is_scalar ? 8 : 2;
|
||
|
|
vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH;
|
||
|
|
prog_data->instances =
|
||
|
|
DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
|
||
|
|
* That divides up as follows:
|
||
|
|
*
|
||
|
|
* 32 bytes for the patch header (tessellation factors)
|
||
|
|
* 480 bytes for per-patch varyings (a varying component is 4 bytes and
|
||
|
|
* gl_MaxTessPatchComponents = 120)
|
||
|
|
* 16384 bytes for per-vertex varyings (a varying component is 4 bytes,
|
||
|
|
* gl_MaxPatchVertices = 32 and
|
||
|
|
* gl_MaxTessControlOutputComponents = 128)
|
||
|
|
*
|
||
|
|
* 15808 bytes left for varying packing overhead
|
||
|
|
*/
|
||
|
|
const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
|
||
|
|
const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
|
||
|
|
unsigned output_size_bytes = 0;
|
||
|
|
/* Note that the patch header is counted in num_per_patch_slots. */
|
||
|
|
output_size_bytes += num_per_patch_slots * 16;
|
||
|
|
output_size_bytes += nir->info.tess.tcs_vertices_out *
|
||
|
|
num_per_vertex_slots * 16;
|
||
|
|
|
||
|
|
assert(output_size_bytes >= 1);
|
||
|
|
if (output_size_bytes > GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES)
|
||
|
|
return NULL;
|
||
|
|
|
||
|
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||
|
|
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||
|
|
|
||
|
|
/* HS does not use the usual payload pushing from URB to GRFs,
|
||
|
|
* because we don't have enough registers for a full-size payload, and
|
||
|
|
* the hardware is broken on Haswell anyway.
|
||
|
|
*/
|
||
|
|
vue_prog_data->urb_read_length = 0;
|
||
|
|
|
||
|
|
if (unlikely(debug_enabled)) {
|
||
|
|
fprintf(stderr, "TCS Input ");
|
||
|
|
brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_CTRL);
|
||
|
|
fprintf(stderr, "TCS Output ");
|
||
|
|
brw_print_vue_map(stderr, &vue_prog_data->vue_map, MESA_SHADER_TESS_CTRL);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (is_scalar) {
|
||
|
|
const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
|
||
|
|
fs_visitor v(compiler, ¶ms->base, &key->base,
|
||
|
|
&prog_data->base.base, nir, dispatch_width,
|
||
|
|
params->base.stats != NULL, debug_enabled);
|
||
|
|
if (!v.run_tcs()) {
|
||
|
|
params->base.error_str =
|
||
|
|
ralloc_strdup(params->base.mem_ctx, v.fail_msg);
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
|
||
|
|
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
|
||
|
|
|
||
|
|
fs_generator g(compiler, ¶ms->base,
|
||
|
|
&prog_data->base.base, false, MESA_SHADER_TESS_CTRL);
|
||
|
|
if (unlikely(debug_enabled)) {
|
||
|
|
g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
|
||
|
|
"%s tessellation control shader %s",
|
||
|
|
nir->info.label ? nir->info.label
|
||
|
|
: "unnamed",
|
||
|
|
nir->info.name));
|
||
|
|
}
|
||
|
|
|
||
|
|
g.generate_code(v.cfg, dispatch_width, v.shader_stats,
|
||
|
|
v.performance_analysis.require(), params->base.stats);
|
||
|
|
|
||
|
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
||
|
|
|
||
|
|
assembly = g.get_assembly();
|
||
|
|
} else {
|
||
|
|
brw::vec4_tcs_visitor v(compiler, ¶ms->base, key, prog_data,
|
||
|
|
nir, debug_enabled);
|
||
|
|
if (!v.run()) {
|
||
|
|
params->base.error_str =
|
||
|
|
ralloc_strdup(params->base.mem_ctx, v.fail_msg);
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (INTEL_DEBUG(DEBUG_TCS))
|
||
|
|
v.dump_instructions();
|
||
|
|
|
||
|
|
|
||
|
|
assembly = brw_vec4_generate_assembly(compiler, ¶ms->base, nir,
|
||
|
|
&prog_data->base, v.cfg,
|
||
|
|
v.performance_analysis.require(),
|
||
|
|
debug_enabled);
|
||
|
|
}
|
||
|
|
|
||
|
|
return assembly;
|
||
|
|
}
|