mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
brw: add support for separate tessellation shader compilation
Tessellation factors have to be written dynamically (based on the next shader primitive topology) and the builtins read using a dynamic offset (based on the preceeding shader's VUE). Anv is updated to use this new infrastructure for dynamic patch_control_points. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
parent
a18835a9ca
commit
a91e0e0d61
17 changed files with 483 additions and 139 deletions
|
|
@ -204,14 +204,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
|
||||
nir->info.outputs_written,
|
||||
nir->info.patch_outputs_written,
|
||||
nir->info.separate_shader);
|
||||
key->separate_tess_vue_layout);
|
||||
|
||||
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
|
||||
brw_nir_lower_vue_inputs(nir, &input_vue_map);
|
||||
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
|
||||
brw_nir_lower_tcs_outputs(nir, devinfo, &vue_prog_data->vue_map,
|
||||
key->_tes_primitive_mode);
|
||||
if (key->input_vertices > 0)
|
||||
intel_nir_lower_patch_vertices_in(nir, key->input_vertices, NULL, NULL);
|
||||
intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
|
||||
|
||||
brw_postprocess_nir(nir, compiler, debug_enabled,
|
||||
key->base.robust_flags);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "brw_generator.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_private.h"
|
||||
#include "intel_nir.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
|
|
@ -65,7 +66,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
nir_shader *nir = params->base.nir;
|
||||
const struct brw_tes_prog_key *key = params->key;
|
||||
const struct intel_vue_map *input_vue_map = params->input_vue_map;
|
||||
struct intel_vue_map input_vue_map;
|
||||
struct brw_tes_prog_data *prog_data = params->prog_data;
|
||||
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
|
||||
|
||||
|
|
@ -73,12 +74,23 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
brw_prog_data_init(&prog_data->base.base, ¶ms->base);
|
||||
|
||||
nir->info.inputs_read = key->inputs_read;
|
||||
nir->info.patch_inputs_read = key->patch_inputs_read;
|
||||
if (params->input_vue_map != NULL) {
|
||||
assert(!key->separate_tess_vue_layout);
|
||||
nir->info.inputs_read = key->inputs_read;
|
||||
nir->info.patch_inputs_read = key->patch_inputs_read;
|
||||
memcpy(&input_vue_map, params->input_vue_map,
|
||||
sizeof(input_vue_map));
|
||||
} else {
|
||||
brw_compute_tess_vue_map(&input_vue_map,
|
||||
nir->info.inputs_read,
|
||||
nir->info.patch_inputs_read,
|
||||
key->separate_tess_vue_layout);
|
||||
}
|
||||
|
||||
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
|
||||
brw_nir_lower_tes_inputs(nir, input_vue_map);
|
||||
brw_nir_lower_tes_inputs(nir, &input_vue_map);
|
||||
brw_nir_lower_vue_outputs(nir);
|
||||
NIR_PASS(_, nir, intel_nir_lower_patch_vertices_tes);
|
||||
brw_postprocess_nir(nir, compiler, debug_enabled,
|
||||
key->base.robust_flags);
|
||||
|
||||
|
|
@ -155,7 +167,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
if (unlikely(debug_enabled)) {
|
||||
fprintf(stderr, "TES Input ");
|
||||
brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
|
||||
brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_EVAL);
|
||||
fprintf(stderr, "TES Output ");
|
||||
brw_print_vue_map(stderr, &prog_data->base.vue_map,
|
||||
MESA_SHADER_TESS_EVAL);
|
||||
|
|
|
|||
|
|
@ -319,7 +319,10 @@ struct brw_tcs_prog_key
|
|||
/** A bitfield of per-patch outputs written. */
|
||||
uint32_t patch_outputs_written;
|
||||
|
||||
uint32_t padding;
|
||||
/** Tesselation VUE layout */
|
||||
bool separate_tess_vue_layout:1;
|
||||
|
||||
uint32_t padding:31;
|
||||
};
|
||||
|
||||
#define BRW_MAX_TCS_INPUT_VERTICES (32)
|
||||
|
|
@ -342,7 +345,10 @@ struct brw_tes_prog_key
|
|||
/** A bitfield of per-patch inputs read. */
|
||||
uint32_t patch_inputs_read;
|
||||
|
||||
uint32_t padding;
|
||||
/** Tesselation VUE layout */
|
||||
bool separate_tess_vue_layout:1;
|
||||
|
||||
uint32_t padding:31;
|
||||
};
|
||||
|
||||
/** The program key for Geometry Shaders. */
|
||||
|
|
@ -1199,11 +1205,23 @@ struct brw_tcs_prog_data
|
|||
/** Should the non-SINGLE_PATCH payload provide primitive ID? */
|
||||
bool include_primitive_id;
|
||||
|
||||
/** Whether the tessellation domain is unknown at compile time
|
||||
*
|
||||
* Used with VK_EXT_shader_object
|
||||
*/
|
||||
bool dynamic_domain;
|
||||
|
||||
/** Number vertices in output patch */
|
||||
int instances;
|
||||
|
||||
/** Track patch count threshold */
|
||||
int patch_count_threshold;
|
||||
|
||||
/**
|
||||
* Push constant location of intel_tess_config (dynamic configuration of
|
||||
* the tessellation shaders).
|
||||
*/
|
||||
unsigned tess_config_param;
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -1215,6 +1233,12 @@ struct brw_tes_prog_data
|
|||
enum intel_tess_output_topology output_topology;
|
||||
enum intel_tess_domain domain;
|
||||
bool include_primitive_id;
|
||||
|
||||
/**
|
||||
* Push constant location of intel_tess_config (dynamic configuration of
|
||||
* the tessellation shaders).
|
||||
*/
|
||||
unsigned tess_config_param;
|
||||
};
|
||||
|
||||
struct brw_gs_prog_data
|
||||
|
|
|
|||
|
|
@ -3321,6 +3321,11 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_tess_config_intel:
|
||||
bld.MOV(retype(dst, BRW_TYPE_UD),
|
||||
brw_uniform_reg(tcs_prog_data->tess_config_param, BRW_TYPE_UD));
|
||||
break;
|
||||
|
||||
default:
|
||||
brw_from_nir_emit_intrinsic(ntb, bld, instr);
|
||||
break;
|
||||
|
|
@ -3429,6 +3434,12 @@ brw_from_nir_emit_tes_intrinsic(nir_to_brw_state &ntb,
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_tess_config_intel:
|
||||
bld.MOV(retype(dest, BRW_TYPE_UD),
|
||||
brw_uniform_reg(tes_prog_data->tess_config_param, BRW_TYPE_UD));
|
||||
break;
|
||||
|
||||
default:
|
||||
brw_from_nir_emit_intrinsic(ntb, bld, instr);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -142,23 +142,53 @@ type_size_dvec4(const struct glsl_type *type, bool bindless)
|
|||
}
|
||||
|
||||
static bool
|
||||
remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
enum tess_primitive_mode _primitive_mode)
|
||||
is_input(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
const int location = nir_intrinsic_base(intr);
|
||||
return intrin->intrinsic == nir_intrinsic_load_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_interpolated_input;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_output(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
return intrin->intrinsic == nir_intrinsic_load_output ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_per_view_output;
|
||||
}
|
||||
|
||||
static bool
|
||||
remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intr)) &&
|
||||
!(b->shader->info.stage == MESA_SHADER_TESS_EVAL && is_input(intr)))
|
||||
return false;
|
||||
|
||||
/* Handled in a different pass */
|
||||
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intr);
|
||||
if (io_sem.location != VARYING_SLOT_TESS_LEVEL_INNER &&
|
||||
io_sem.location != VARYING_SLOT_TESS_LEVEL_OUTER)
|
||||
return false;
|
||||
|
||||
const unsigned component = nir_intrinsic_component(intr);
|
||||
bool out_of_bounds = false;
|
||||
bool write = !nir_intrinsic_infos[intr->intrinsic].has_dest;
|
||||
unsigned mask = write ? nir_intrinsic_write_mask(intr) : 0;
|
||||
nir_def *src = NULL, *dest = NULL;
|
||||
|
||||
enum tess_primitive_mode _primitive_mode = (uintptr_t)data;
|
||||
|
||||
if (write) {
|
||||
assert(intr->num_components == intr->src[0].ssa->num_components);
|
||||
} else {
|
||||
assert(intr->num_components == intr->def.num_components);
|
||||
}
|
||||
|
||||
if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
|
||||
if (io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER) {
|
||||
b->cursor = write ? nir_before_instr(&intr->instr)
|
||||
: nir_after_instr(&intr->instr);
|
||||
|
||||
|
|
@ -201,7 +231,7 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
default:
|
||||
UNREACHABLE("Bogus tessellation domain");
|
||||
}
|
||||
} else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
|
||||
} else {
|
||||
b->cursor = write ? nir_before_instr(&intr->instr)
|
||||
: nir_after_instr(&intr->instr);
|
||||
|
||||
|
|
@ -253,8 +283,6 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
default:
|
||||
UNREACHABLE("Bogus tessellation domain");
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (out_of_bounds) {
|
||||
|
|
@ -275,74 +303,192 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
}
|
||||
|
||||
static bool
|
||||
is_input(nir_intrinsic_instr *intrin)
|
||||
remap_tess_header_values(nir_shader *nir, enum tess_primitive_mode _primitive_mode)
|
||||
{
|
||||
return intrin->intrinsic == nir_intrinsic_load_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
||||
intrin->intrinsic == nir_intrinsic_load_interpolated_input;
|
||||
return nir_shader_intrinsics_pass(nir, remap_tess_levels,
|
||||
nir_metadata_control_flow,
|
||||
(void*)(uintptr_t)_primitive_mode);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_output(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
return intrin->intrinsic == nir_intrinsic_load_output ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_per_view_output;
|
||||
}
|
||||
|
||||
struct remap_patch_urb_offset_state {
|
||||
const struct intel_vue_map *vue_map;
|
||||
enum tess_primitive_mode tes_primitive_mode;
|
||||
struct tess_levels_temporary_state {
|
||||
nir_variable *inner_factors_var;
|
||||
nir_variable *outer_factors_var;
|
||||
};
|
||||
|
||||
static bool
|
||||
remap_patch_urb_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
remap_tess_levels_to_temporary(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
struct remap_patch_urb_offset_state *state = data;
|
||||
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intrin)))
|
||||
return false;
|
||||
|
||||
/* Handled in a different pass */
|
||||
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
|
||||
if (io_sem.location != VARYING_SLOT_TESS_LEVEL_INNER &&
|
||||
io_sem.location != VARYING_SLOT_TESS_LEVEL_OUTER)
|
||||
return false;
|
||||
|
||||
struct tess_levels_temporary_state *state = data;
|
||||
|
||||
nir_variable *var = io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER ?
|
||||
state->inner_factors_var : state->outer_factors_var;
|
||||
if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
nir_def *new_val =
|
||||
nir_load_array_var(b, var,
|
||||
nir_iadd_imm(b, nir_get_io_offset_src(intrin)->ssa,
|
||||
nir_intrinsic_component(intrin)));
|
||||
nir_def_replace(&intrin->def, new_val);
|
||||
} else {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_store_array_var(b, var,
|
||||
nir_iadd_imm(b, nir_get_io_offset_src(intrin)->ssa,
|
||||
nir_intrinsic_component(intrin)),
|
||||
intrin->src[0].ssa,
|
||||
nir_intrinsic_write_mask(intrin));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
remap_tess_header_values_dynamic(nir_shader *nir, const struct intel_device_info *devinfo)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
|
||||
struct tess_levels_temporary_state state = {
|
||||
.inner_factors_var = nir_local_variable_create(
|
||||
impl, glsl_array_type(glsl_uint_type(), 2, 0),
|
||||
"__temp_inner_factors"),
|
||||
.outer_factors_var = nir_local_variable_create(
|
||||
impl, glsl_array_type(glsl_uint_type(), 4, 0),
|
||||
"__temp_outer_factors"),
|
||||
};
|
||||
|
||||
nir_shader_intrinsics_pass(nir, remap_tess_levels_to_temporary,
|
||||
nir_metadata_control_flow, &state);
|
||||
|
||||
nir_builder _b = nir_builder_at(nir_after_impl(impl)), *b = &_b;
|
||||
|
||||
nir_def *tess_config = nir_load_tess_config_intel(b);
|
||||
nir_def *is_quad =
|
||||
nir_test_mask(b, tess_config, INTEL_TESS_CONFIG_QUADS);
|
||||
nir_def *is_tri =
|
||||
nir_test_mask(b, tess_config, INTEL_TESS_CONFIG_TRIANGLES);
|
||||
nir_def *is_quad_tri =
|
||||
nir_test_mask(b, tess_config, (INTEL_TESS_CONFIG_QUADS |
|
||||
INTEL_TESS_CONFIG_TRIANGLES));
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
/* Format below is described in the SKL PRMs, Volume 7: 3D-Media-GPGPU,
|
||||
* Patch URB Entry (Patch Record) Output, Patch Header DW0-7
|
||||
*
|
||||
* Based on topology we use one of those :
|
||||
* - Patch Header: QUAD Domain / LEGACY Patch Header Layout
|
||||
* - Patch Header: TRI Domain / LEGACY Patch Header Layout
|
||||
* - Patch Header: ISOLINE Domain / LEGACY Patch Header Layout
|
||||
*
|
||||
* There are more convenient layouts in more recent generations but they're
|
||||
* not available on all platforms.
|
||||
*/
|
||||
nir_def *values[8] = {
|
||||
zero,
|
||||
zero,
|
||||
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.inner_factors_var, 1), zero),
|
||||
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.inner_factors_var, 0), zero),
|
||||
|
||||
nir_bcsel(b, is_quad, nir_load_array_var_imm(b, state.outer_factors_var, 3),
|
||||
nir_bcsel(b, is_tri, nir_load_array_var_imm(b, state.inner_factors_var, 0),
|
||||
zero)),
|
||||
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 2), zero),
|
||||
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 1),
|
||||
nir_load_array_var_imm(b, state.outer_factors_var, 0)),
|
||||
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 0),
|
||||
nir_load_array_var_imm(b, state.outer_factors_var, 1)),
|
||||
};
|
||||
|
||||
nir_store_output(b, nir_vec(b, &values[0], 4), zero, .base = 0,
|
||||
.io_semantics.location = VARYING_SLOT_TESS_LEVEL_INNER);
|
||||
nir_store_output(b, nir_vec(b, &values[4], 4), zero, .base = 1,
|
||||
.io_semantics.location = VARYING_SLOT_TESS_LEVEL_OUTER);
|
||||
|
||||
nir_progress(true, impl, nir_metadata_none);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
remap_patch_urb_offsets_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) &&
|
||||
!(b->shader->info.stage == MESA_SHADER_TESS_EVAL && is_input(intrin)))
|
||||
return false;
|
||||
|
||||
if (remap_tess_levels(b, intrin, state->tes_primitive_mode))
|
||||
/* Handled in a different pass */
|
||||
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
|
||||
if (io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
io_sem.location == VARYING_SLOT_TESS_LEVEL_OUTER)
|
||||
return false;
|
||||
|
||||
int vue_slot = state->vue_map->varying_to_slot[intrin->const_index[0]];
|
||||
gl_varying_slot varying = intrin->const_index[0];
|
||||
|
||||
const struct intel_vue_map *vue_map = data;
|
||||
int vue_slot = vue_map->varying_to_slot[varying];
|
||||
assert(vue_slot != -1);
|
||||
intrin->const_index[0] = vue_slot;
|
||||
|
||||
nir_src *vertex = nir_get_io_arrayed_index_src(intrin);
|
||||
if (vertex) {
|
||||
if (nir_src_is_const(*vertex)) {
|
||||
intrin->const_index[0] += nir_src_as_uint(*vertex) *
|
||||
state->vue_map->num_per_vertex_slots;
|
||||
} else {
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
/* Multiply by the number of per-vertex slots. */
|
||||
nir_def *vertex_offset =
|
||||
nir_imul(b,
|
||||
vertex->ssa,
|
||||
nir_imm_int(b,
|
||||
state->vue_map->num_per_vertex_slots));
|
||||
bool dyn_tess_config =
|
||||
b->shader->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
vue_map->layout != INTEL_VUE_LAYOUT_FIXED;
|
||||
nir_def *num_per_vertex_slots =
|
||||
dyn_tess_config ? intel_nir_tess_field(b, PER_VERTEX_SLOTS) :
|
||||
nir_imm_int(b, vue_map->num_per_vertex_slots);
|
||||
|
||||
/* Add it to the existing offset */
|
||||
nir_src *offset = nir_get_io_offset_src(intrin);
|
||||
nir_def *total_offset =
|
||||
nir_iadd(b, vertex_offset,
|
||||
offset->ssa);
|
||||
/* Multiply by the number of per-vertex slots. */
|
||||
nir_def *vertex_offset = nir_imul(b, vertex->ssa, num_per_vertex_slots);
|
||||
|
||||
nir_src_rewrite(offset, total_offset);
|
||||
/* Add it to the existing offset */
|
||||
nir_src *offset = nir_get_io_offset_src(intrin);
|
||||
nir_def *total_offset = nir_iadd(b, vertex_offset, offset->ssa);
|
||||
|
||||
/* In the Tessellation evaluation shader, reposition the offset of
|
||||
* builtins when using separate layout.
|
||||
*/
|
||||
if (dyn_tess_config) {
|
||||
if (varying < VARYING_SLOT_VAR0) {
|
||||
nir_def *builtins_offset = intel_nir_tess_field(b, BUILTINS);
|
||||
nir_def *builtins_base_offset = nir_iadd_imm(
|
||||
b, builtins_offset,
|
||||
vue_map->varying_to_slot[varying] - vue_map->builtins_slot_offset);
|
||||
|
||||
total_offset = nir_iadd(b, total_offset, builtins_base_offset);
|
||||
} else {
|
||||
nir_def *vertices_offset = intel_nir_tess_field(b, PER_PATCH_SLOTS);
|
||||
nir_def *vertices_base_offset = nir_iadd_imm(
|
||||
b, vertices_offset,
|
||||
vue_map->varying_to_slot[varying] - vue_map->num_per_patch_slots);
|
||||
|
||||
total_offset = nir_iadd(b, total_offset, vertices_base_offset);
|
||||
}
|
||||
nir_intrinsic_set_base(intrin, 0);
|
||||
}
|
||||
|
||||
nir_src_rewrite(offset, total_offset);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
remap_non_header_patch_urb_offsets(nir_shader *nir, const struct intel_vue_map *vue_map)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(nir, remap_patch_urb_offsets_instr,
|
||||
nir_metadata_control_flow, (void *)vue_map);
|
||||
}
|
||||
|
||||
/* Replace store_per_view_output to plain store_output, mapping the view index
|
||||
* to IO offset. Because we only use per-view outputs for position, the offset
|
||||
* pitch is always 1. */
|
||||
|
|
@ -596,17 +742,22 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct intel_vue_map *vue_map)
|
|||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
|
||||
/* This pass needs actual constants */
|
||||
/* Run add_const_offset_to_base to allow update base/io_semantic::location
|
||||
* for the remapping pass to look into the VUE mapping.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
|
||||
|
||||
NIR_PASS(_, nir, remap_non_header_patch_urb_offsets, vue_map);
|
||||
NIR_PASS(_, nir, remap_tess_header_values, nir->info.tess._primitive_mode);
|
||||
|
||||
/* remap_non_header_patch_urb_offsets can add constant math into the
|
||||
* shader, just fold it for the backend.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_algebraic);
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, remap_patch_urb_offsets,
|
||||
nir_metadata_control_flow,
|
||||
&(struct remap_patch_urb_offset_state) {
|
||||
.vue_map = vue_map,
|
||||
.tes_primitive_mode = nir->info.tess._primitive_mode,
|
||||
});
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -874,7 +1025,9 @@ brw_nir_lower_vue_outputs(nir_shader *nir)
|
|||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue_map,
|
||||
brw_nir_lower_tcs_outputs(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct intel_vue_map *vue_map,
|
||||
enum tess_primitive_mode tes_primitive_mode)
|
||||
{
|
||||
nir_foreach_shader_out_variable(var, nir) {
|
||||
|
|
@ -884,17 +1037,23 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue_map,
|
|||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
|
||||
/* This pass needs actual constants */
|
||||
/* Run add_const_offset_to_base to allow update base/io_semantic::location
|
||||
* for the remapping pass to look into the VUE mapping.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, remap_patch_urb_offsets,
|
||||
nir_metadata_control_flow,
|
||||
&(struct remap_patch_urb_offset_state) {
|
||||
.vue_map = vue_map,
|
||||
.tes_primitive_mode = tes_primitive_mode,
|
||||
});
|
||||
NIR_PASS(_, nir, remap_non_header_patch_urb_offsets, vue_map);
|
||||
if (tes_primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
|
||||
NIR_PASS(_, nir, remap_tess_header_values, tes_primitive_mode);
|
||||
else
|
||||
NIR_PASS(_, nir, remap_tess_header_values_dynamic, devinfo);
|
||||
|
||||
/* remap_non_header_patch_urb_offsets can add constant math into the
|
||||
* shader, just fold it for the backend.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -187,7 +187,9 @@ void brw_nir_lower_fs_inputs(nir_shader *nir,
|
|||
const struct intel_device_info *devinfo,
|
||||
const struct brw_wm_prog_key *key);
|
||||
void brw_nir_lower_vue_outputs(nir_shader *nir);
|
||||
void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue,
|
||||
void brw_nir_lower_tcs_outputs(nir_shader *nir,
|
||||
const struct intel_device_info *devinfo,
|
||||
const struct intel_vue_map *vue,
|
||||
enum tess_primitive_mode tes_primitive_mode);
|
||||
void brw_nir_lower_fs_outputs(nir_shader *nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -263,6 +263,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
|
|||
* recompute state when TF changes, so we just always include it.
|
||||
*/
|
||||
if (layout != INTEL_VUE_LAYOUT_SEPARATE_MESH) {
|
||||
vue_map->builtins_slot_offset = slot;
|
||||
const uint64_t builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
|
||||
u_foreach_bit64(varying, builtins) {
|
||||
/* Already assigned above? */
|
||||
|
|
@ -273,7 +274,11 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
const int first_generic_slot = slot;
|
||||
const uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
|
||||
const uint64_t generics =
|
||||
(layout != INTEL_VUE_LAYOUT_FIXED ?
|
||||
BITFIELD64_MASK(util_last_bit64(slots_valid)) :
|
||||
slots_valid) &
|
||||
~BITFIELD64_MASK(VARYING_SLOT_VAR0);
|
||||
u_foreach_bit64(varying, generics) {
|
||||
if (layout != INTEL_VUE_LAYOUT_FIXED) {
|
||||
slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
|
||||
|
|
@ -282,6 +287,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
if (layout == INTEL_VUE_LAYOUT_SEPARATE_MESH) {
|
||||
vue_map->builtins_slot_offset = slot;
|
||||
const uint64_t builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
|
||||
u_foreach_bit64(varying, builtins) {
|
||||
/* Already assigned above? */
|
||||
|
|
@ -314,8 +320,11 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
|
|||
vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
|
||||
VARYING_BIT_TESS_LEVEL_INNER);
|
||||
|
||||
if (separate)
|
||||
if (separate) {
|
||||
vertex_slots |= VARYING_BIT_POS;
|
||||
vertex_slots |= VARYING_BIT_CLIP_DIST0;
|
||||
vertex_slots |= VARYING_BIT_CLIP_DIST1;
|
||||
}
|
||||
|
||||
/* Make sure that the values we store in vue_map->varying_to_slot and
|
||||
* vue_map->slot_to_varying won't overflow the signed chars that are used
|
||||
|
|
@ -381,9 +390,11 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
|
|||
slot = first_generics_slot + varying - VARYING_SLOT_VAR0;
|
||||
assign_vue_slot(vue_map, varying, slot++);
|
||||
}
|
||||
vue_map->builtins_slot_offset = slot;
|
||||
u_foreach_bit64(varying, builtins)
|
||||
assign_vue_slot(vue_map, varying, slot++);
|
||||
} else {
|
||||
vue_map->builtins_slot_offset = slot;
|
||||
u_foreach_bit64(varying, builtins) {
|
||||
assign_vue_slot(vue_map, varying, slot++);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -386,8 +386,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
|
|||
key->_tes_primitive_mode);
|
||||
if (key->quads_workaround)
|
||||
intel_nir_apply_tcs_quads_workaround(nir);
|
||||
if (key->input_vertices > 0)
|
||||
intel_nir_lower_patch_vertices_in(nir, key->input_vertices, NULL, NULL);
|
||||
intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
|
||||
|
||||
elk_postprocess_nir(nir, compiler, debug_enabled,
|
||||
key->base.robust_flags);
|
||||
|
|
|
|||
|
|
@ -13,7 +13,10 @@ extern "C" {
|
|||
|
||||
struct intel_device_info;
|
||||
|
||||
|
||||
#define intel_nir_tess_field(b, field) \
|
||||
nir_ubitfield_extract_imm(b, nir_load_tess_config_intel(b), \
|
||||
INTEL_TESS_CONFIG_##field##_OFFSET, \
|
||||
INTEL_TESS_CONFIG_##field##_SIZE)
|
||||
|
||||
void intel_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
||||
bool brw_nir_rebase_const_offset_ubo_loads(nir_shader *shader);
|
||||
|
|
@ -26,9 +29,9 @@ bool intel_nir_cleanup_resource_intel(nir_shader *shader);
|
|||
bool intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
|
||||
bool intel_nir_lower_non_uniform_resource_intel(nir_shader *shader);
|
||||
bool intel_nir_lower_patch_vertices_in(nir_shader *shader,
|
||||
unsigned input_vertices,
|
||||
nir_lower_instr_cb cb,
|
||||
void *data);
|
||||
unsigned input_vertices);
|
||||
bool intel_nir_lower_patch_vertices_tes(nir_shader *shader);
|
||||
|
||||
bool intel_nir_lower_shading_rate_output(nir_shader *nir);
|
||||
bool intel_nir_lower_sparse_intrinsics(nir_shader *nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "intel_shader_enums.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_deref.h"
|
||||
|
||||
|
|
@ -83,13 +84,11 @@ intel_nir_clamp_per_vertex_loads(nir_shader *shader)
|
|||
|
||||
struct lower_patch_vertices_state {
|
||||
unsigned input_vertices;
|
||||
nir_lower_instr_cb cb;
|
||||
void *data;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
lower_patch_vertices_in_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
|
||||
return false;
|
||||
|
|
@ -101,24 +100,44 @@ lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
nir_def *val =
|
||||
state->input_vertices ?
|
||||
nir_imm_int(b, state->input_vertices) :
|
||||
state->cb(b, &intrin->instr, state->data);
|
||||
nir_def_rewrite_uses(&intrin->def, val);
|
||||
nir_iadd_imm(b, intel_nir_tess_field(b, INPUT_VERTICES), 1);
|
||||
|
||||
nir_def_replace(&intrin->def, val);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_patch_vertices_in(nir_shader *shader,
|
||||
unsigned input_vertices,
|
||||
nir_lower_instr_cb cb,
|
||||
void *data)
|
||||
unsigned input_vertices)
|
||||
{
|
||||
assert(input_vertices != 0 || cb != NULL);
|
||||
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
struct lower_patch_vertices_state state = {
|
||||
.input_vertices = input_vertices,
|
||||
.cb = cb,
|
||||
.data = data,
|
||||
};
|
||||
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_instr,
|
||||
nir_metadata_control_flow, &state);
|
||||
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_in_instr,
|
||||
nir_metadata_none, &state);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_patch_vertices_tes_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_def *field = intel_nir_tess_field(b, OUTPUT_VERTICES);
|
||||
|
||||
nir_def_replace(&intrin->def, nir_iadd_imm(b, field, 1));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_patch_vertices_tes(nir_shader *shader)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_TESS_EVAL);
|
||||
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_tes_instr,
|
||||
nir_metadata_none, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,57 @@ intel_sometimes_invert(enum intel_sometimes x)
|
|||
return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
|
||||
}
|
||||
|
||||
#define INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET (0)
|
||||
#define INTEL_TESS_CONFIG_INPUT_VERTICES_SIZE (5)
|
||||
#define INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET (5)
|
||||
#define INTEL_TESS_CONFIG_OUTPUT_VERTICES_SIZE (5)
|
||||
#define INTEL_TESS_CONFIG_BUILTINS_OFFSET (10)
|
||||
#define INTEL_TESS_CONFIG_BUILTINS_SIZE (6)
|
||||
#define INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET (16)
|
||||
#define INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_SIZE (6)
|
||||
#define INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET (22)
|
||||
#define INTEL_TESS_CONFIG_PER_PATCH_SLOTS_SIZE (6)
|
||||
|
||||
enum intel_tess_configs {
|
||||
/** Tessellation inputs vertices minus 1
|
||||
*
|
||||
* This field actually covers 5bits.
|
||||
*/
|
||||
INTEL_TESS_CONFIG_INPUT_VERTICES = BITFIELD_BIT(INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET),
|
||||
|
||||
/** Tessellation outputs vertices minus 1
|
||||
*
|
||||
* This field actually covers 5bits.
|
||||
*/
|
||||
INTEL_TESS_CONFIG_OUTPUT_VERTICES = BITFIELD_BIT(INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET),
|
||||
|
||||
/** Tessellation builtins per-vertex offset
|
||||
*
|
||||
* This field actually covers 5bits.
|
||||
*/
|
||||
INTEL_TESS_CONFIG_BUILTINS = BITFIELD_BIT(INTEL_TESS_CONFIG_BUILTINS_OFFSET),
|
||||
|
||||
/** Number of per vertex slots
|
||||
*
|
||||
* This field actually covers 6bits.
|
||||
*/
|
||||
INTEL_TESS_PER_VERTEX_SLOTS = BITFIELD_BIT(INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET),
|
||||
|
||||
/** Number of per patch slots
|
||||
*
|
||||
* This field actually covers 6bits.
|
||||
*/
|
||||
INTEL_TESS_PER_PATCH_SLOTS = BITFIELD_BIT(INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET),
|
||||
|
||||
/** Tesselation primitive modes
|
||||
*
|
||||
* Only one of the following 3 bits should be set.
|
||||
*/
|
||||
INTEL_TESS_CONFIG_QUADS = BITFIELD_BIT(29),
|
||||
INTEL_TESS_CONFIG_TRIANGLES = BITFIELD_BIT(30),
|
||||
INTEL_TESS_CONFIG_ISOLINES = BITFIELD_BIT(31)
|
||||
};
|
||||
|
||||
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET (19)
|
||||
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE (6)
|
||||
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (25)
|
||||
|
|
@ -294,6 +345,11 @@ struct intel_vue_map {
|
|||
* shader outputs and tessellation evaluation shader inputs.
|
||||
*/
|
||||
int num_per_vertex_slots;
|
||||
|
||||
/**
|
||||
* Location at which the builtins live.
|
||||
*/
|
||||
int builtins_slot_offset;
|
||||
};
|
||||
|
||||
struct intel_cs_dispatch_info {
|
||||
|
|
@ -314,6 +370,36 @@ enum intel_compute_walk_order {
|
|||
INTEL_WALK_ORDER_ZYX = 5,
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
intel_tess_config(uint32_t input_vertices,
|
||||
uint32_t output_vertices,
|
||||
enum intel_tess_domain tess_domain,
|
||||
uint32_t num_per_patch_slots,
|
||||
uint32_t num_per_vertex_slots,
|
||||
uint32_t builtins_slot_offset)
|
||||
{
|
||||
assert(num_per_patch_slots < (1u << INTEL_TESS_CONFIG_PER_PATCH_SLOTS_SIZE));
|
||||
assert(num_per_vertex_slots < (1u << INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_SIZE));
|
||||
assert(builtins_slot_offset < (1u << INTEL_TESS_CONFIG_BUILTINS_SIZE));
|
||||
assert(input_vertices != 0);
|
||||
assert((input_vertices - 1) < (1u << INTEL_TESS_CONFIG_INPUT_VERTICES_SIZE));
|
||||
assert((output_vertices - 1) < (1u << INTEL_TESS_CONFIG_OUTPUT_VERTICES_SIZE));
|
||||
|
||||
const uint32_t primitive_flags =
|
||||
tess_domain == INTEL_TESS_DOMAIN_TRI ? INTEL_TESS_CONFIG_TRIANGLES :
|
||||
tess_domain == INTEL_TESS_DOMAIN_QUAD ? INTEL_TESS_CONFIG_QUADS :
|
||||
INTEL_TESS_CONFIG_ISOLINES;
|
||||
|
||||
return
|
||||
(((input_vertices - 1) & 0x1f) << INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET) |
|
||||
(((output_vertices - 1) & 0x1f) << INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET) |
|
||||
primitive_flags |
|
||||
(num_per_patch_slots << INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET) |
|
||||
(num_per_vertex_slots << INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET) |
|
||||
(builtins_slot_offset << INTEL_TESS_CONFIG_BUILTINS_OFFSET);
|
||||
|
||||
}
|
||||
|
||||
static inline bool
|
||||
intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,
|
||||
bool shader_per_sample_shading,
|
||||
|
|
|
|||
|
|
@ -114,11 +114,16 @@ bool anv_nir_apply_pipeline_layout(nir_shader *shader,
|
|||
struct anv_pipeline_push_map *push_map,
|
||||
void *push_map_mem_ctx);
|
||||
|
||||
struct anv_nir_push_layout_info {
|
||||
bool fragment_dynamic;
|
||||
bool mesh_dynamic;
|
||||
};
|
||||
|
||||
bool anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
bool fragment_dynamic,
|
||||
bool mesh_dynamic,
|
||||
const struct anv_nir_push_layout_info *info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ bool
|
|||
anv_nir_compute_push_layout(nir_shader *nir,
|
||||
const struct anv_physical_device *pdevice,
|
||||
enum brw_robustness_flags robust_flags,
|
||||
bool fragment_dynamic,
|
||||
bool mesh_dynamic,
|
||||
const struct anv_nir_push_layout_info *push_info,
|
||||
struct brw_base_prog_key *prog_key,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map,
|
||||
const struct anv_pipeline_push_map *push_map,
|
||||
|
|
@ -92,7 +92,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
const bool needs_wa_18019110168 =
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
brw_nir_fragment_shader_needs_wa_18019110168(
|
||||
devinfo, mesh_dynamic ? INTEL_SOMETIMES : INTEL_NEVER, nir);
|
||||
devinfo, push_info->mesh_dynamic ? INTEL_SOMETIMES : INTEL_NEVER, nir);
|
||||
|
||||
if (push_ubo_ranges && (robust_flags & BRW_ROBUSTNESS_UBO)) {
|
||||
/* We can't on-the-fly adjust our push ranges because doing so would
|
||||
|
|
@ -111,7 +111,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (fragment_dynamic) {
|
||||
if (push_info->fragment_dynamic) {
|
||||
const uint32_t fs_msaa_flags_start =
|
||||
anv_drv_const_offset(gfx.fs_msaa_flags);
|
||||
const uint32_t fs_msaa_flags_end =
|
||||
|
|
@ -132,6 +132,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
}
|
||||
}
|
||||
|
||||
const bool needs_dyn_tess_config =
|
||||
nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||
container_of(prog_key, struct brw_tcs_prog_key, base)->input_vertices == 0;
|
||||
if (needs_dyn_tess_config) {
|
||||
const uint32_t tess_config_start = anv_drv_const_offset(gfx.tess_config);
|
||||
const uint32_t tess_config_end = tess_config_start +
|
||||
anv_drv_const_size(gfx.tess_config);
|
||||
push_start = MIN2(push_start, tess_config_start);
|
||||
push_end = MAX2(push_end, tess_config_end);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
|
||||
/* For compute shaders, we always have to have the subgroup ID. The
|
||||
* back-end compiler will "helpfully" add it for us in the last push
|
||||
|
|
@ -244,7 +255,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
*/
|
||||
const bool needs_padding_per_primitive =
|
||||
needs_wa_18019110168 ||
|
||||
(mesh_dynamic &&
|
||||
(push_info->mesh_dynamic &&
|
||||
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID));
|
||||
|
||||
unsigned n_push_ranges = 0;
|
||||
|
|
@ -344,11 +355,20 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
|
||||
assert(n_push_ranges <= 4);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL && needs_dyn_tess_config) {
|
||||
struct brw_tcs_prog_data *tcs_prog_data =
|
||||
container_of(prog_data, struct brw_tcs_prog_data, base.base);
|
||||
|
||||
const uint32_t tess_config_offset =
|
||||
anv_drv_const_offset(gfx.tess_config);
|
||||
assert(tess_config_offset >= push_start);
|
||||
tcs_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
|
||||
}
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
struct brw_wm_prog_data *wm_prog_data =
|
||||
container_of(prog_data, struct brw_wm_prog_data, base);
|
||||
|
||||
if (fragment_dynamic) {
|
||||
if (push_info->fragment_dynamic) {
|
||||
const uint32_t fs_msaa_flags_offset =
|
||||
anv_drv_const_offset(gfx.fs_msaa_flags);
|
||||
assert(fs_msaa_flags_offset >= push_start);
|
||||
|
|
|
|||
|
|
@ -940,12 +940,6 @@ accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data)
|
|||
intrin->def.bit_size == 64;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_tcs_input_vertices(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
return anv_load_driver_uniform(b, 1, gfx.tcs_input_vertices);
|
||||
}
|
||||
|
||||
static void
|
||||
fixup_large_workgroup_image_coherency(nir_shader *nir)
|
||||
{
|
||||
|
|
@ -1055,15 +1049,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
|||
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
|
||||
}
|
||||
|
||||
/* The patch control points are delivered through a push constant when
|
||||
* dynamic.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS(_, nir, intel_nir_lower_patch_vertices_in,
|
||||
stage->key.tcs.input_vertices,
|
||||
build_tcs_input_vertices, NULL);
|
||||
}
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
/* Apply lowering for 64bit atomics pre-Xe2 */
|
||||
|
|
@ -1169,9 +1154,12 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
|||
|
||||
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
||||
pdevice, stage->key.base.robust_flags,
|
||||
anv_graphics_pipeline_stage_fragment_dynamic(stage),
|
||||
anv_graphics_pipeline_stage_mesh_dynamic(stage),
|
||||
prog_data, &stage->bind_map, &push_map, mem_ctx);
|
||||
&(struct anv_nir_push_layout_info) {
|
||||
.fragment_dynamic = anv_graphics_pipeline_stage_fragment_dynamic(stage),
|
||||
.mesh_dynamic = anv_graphics_pipeline_stage_mesh_dynamic(stage),
|
||||
},
|
||||
&stage->key.base, prog_data,
|
||||
&stage->bind_map, &push_map, mem_ctx);
|
||||
|
||||
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
||||
stage->bind_map.layout_type);
|
||||
|
|
|
|||
|
|
@ -1562,7 +1562,7 @@ enum anv_gfx_state_bits {
|
|||
ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
|
||||
ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
|
||||
ANV_GFX_STATE_FS_MSAA_FLAGS,
|
||||
ANV_GFX_STATE_TCS_INPUT_VERTICES,
|
||||
ANV_GFX_STATE_TESS_CONFIG,
|
||||
ANV_GFX_STATE_MESH_PROVOKING_VERTEX,
|
||||
|
||||
ANV_GFX_STATE_MAX,
|
||||
|
|
@ -1949,12 +1949,9 @@ struct anv_gfx_dynamic_state {
|
|||
enum intel_msaa_flags fs_msaa_flags;
|
||||
|
||||
/**
|
||||
* Dynamic TCS input vertices, this value can be different from
|
||||
* anv_driver_constants::gfx::tcs_input_vertices, as the push constant
|
||||
* value only needs to be updated for tesselation control shaders
|
||||
* dynamically checking the value.
|
||||
* Dynamic tesselation configuration (see enum intel_tess_config).
|
||||
*/
|
||||
uint32_t tcs_input_vertices;
|
||||
uint32_t tess_config;
|
||||
|
||||
/**
|
||||
* Provoking vertex index, sent to the mesh shader for Wa_18019110168.
|
||||
|
|
@ -3972,8 +3969,8 @@ struct anv_push_constants {
|
|||
/** Dynamic MSAA value */
|
||||
uint32_t fs_msaa_flags;
|
||||
|
||||
/** Dynamic TCS input vertices */
|
||||
uint32_t tcs_input_vertices;
|
||||
/** Dynamic TCS/TES configuration */
|
||||
uint32_t tess_config;
|
||||
|
||||
/** Robust access pushed registers. */
|
||||
uint8_t push_reg_mask[MESA_SHADER_STAGES][4];
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
|
|||
NAME(WA_14018283232);
|
||||
NAME(TBIMR_TILE_PASS_INFO);
|
||||
NAME(FS_MSAA_FLAGS);
|
||||
NAME(TCS_INPUT_VERTICES);
|
||||
NAME(TESS_CONFIG);
|
||||
NAME(MESH_PROVOKING_VERTEX);
|
||||
default: UNREACHABLE("invalid state");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2440,10 +2440,19 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
* reemit if needed.
|
||||
*/
|
||||
const struct brw_tcs_prog_data *tcs_prog_data = get_gfx_tcs_prog_data(gfx);
|
||||
if (tcs_prog_data && tcs_prog_data->input_vertices == 0 &&
|
||||
const bool tcs_dynamic =
|
||||
tcs_prog_data && tcs_prog_data->input_vertices == 0;
|
||||
if (tcs_dynamic &&
|
||||
((gfx->dirty & ANV_CMD_DIRTY_HS) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
|
||||
SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))) {
|
||||
SET(TESS_CONFIG, tess_config,
|
||||
intel_tess_config(dyn->ts.patch_control_points,
|
||||
tcs_prog_data->instances,
|
||||
0,
|
||||
tcs_prog_data->base.vue_map.num_per_patch_slots,
|
||||
tcs_prog_data->base.vue_map.num_per_vertex_slots,
|
||||
tcs_prog_data->base.vue_map.builtins_slot_offset));
|
||||
}
|
||||
|
||||
#if INTEL_WA_18019110168_GFX_VER
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
|
||||
|
|
@ -3388,7 +3397,7 @@ genX(emit_urb_setup)(struct anv_batch *batch,
|
|||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->vk.enabled_extensions.EXT_mesh_shader) {
|
||||
if (device->vk.enabled_features.meshShader) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
|
||||
if (urb_cfg->size[MESA_SHADER_TASK] > 0)
|
||||
urb.TASKURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_TASK] - 1;
|
||||
|
|
@ -3463,8 +3472,8 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
* Values provided by push constants
|
||||
*/
|
||||
|
||||
if (IS_DIRTY(TCS_INPUT_VERTICES)) {
|
||||
push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
|
||||
if (IS_DIRTY(TESS_CONFIG)) {
|
||||
push_consts->gfx.tess_config = hw_state->tess_config;
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
gfx->base.push_constants_data_dirty = true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue