brw: add support for separate tessellation shader compilation

Tessellation factors have to be written dynamically (based on the next
shader primitive topology) and the builtins read using a dynamic
offset (based on the preceeding shader's VUE).

Anv is updated to use this new infrastructure for dynamic
patch_control_points.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
Lionel Landwerlin 2025-04-22 18:42:35 +03:00 committed by Marge Bot
parent a18835a9ca
commit a91e0e0d61
17 changed files with 483 additions and 139 deletions

View file

@ -204,14 +204,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
nir->info.outputs_written,
nir->info.patch_outputs_written,
nir->info.separate_shader);
key->separate_tess_vue_layout);
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
brw_nir_lower_vue_inputs(nir, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
brw_nir_lower_tcs_outputs(nir, devinfo, &vue_prog_data->vue_map,
key->_tes_primitive_mode);
if (key->input_vertices > 0)
intel_nir_lower_patch_vertices_in(nir, key->input_vertices, NULL, NULL);
intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
brw_postprocess_nir(nir, compiler, debug_enabled,
key->base.robust_flags);

View file

@ -9,6 +9,7 @@
#include "brw_generator.h"
#include "brw_nir.h"
#include "brw_private.h"
#include "intel_nir.h"
#include "dev/intel_debug.h"
#include "util/macros.h"
@ -65,7 +66,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
const struct intel_device_info *devinfo = compiler->devinfo;
nir_shader *nir = params->base.nir;
const struct brw_tes_prog_key *key = params->key;
const struct intel_vue_map *input_vue_map = params->input_vue_map;
struct intel_vue_map input_vue_map;
struct brw_tes_prog_data *prog_data = params->prog_data;
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
@ -73,12 +74,23 @@ brw_compile_tes(const struct brw_compiler *compiler,
brw_prog_data_init(&prog_data->base.base, &params->base);
nir->info.inputs_read = key->inputs_read;
nir->info.patch_inputs_read = key->patch_inputs_read;
if (params->input_vue_map != NULL) {
assert(!key->separate_tess_vue_layout);
nir->info.inputs_read = key->inputs_read;
nir->info.patch_inputs_read = key->patch_inputs_read;
memcpy(&input_vue_map, params->input_vue_map,
sizeof(input_vue_map));
} else {
brw_compute_tess_vue_map(&input_vue_map,
nir->info.inputs_read,
nir->info.patch_inputs_read,
key->separate_tess_vue_layout);
}
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
brw_nir_lower_tes_inputs(nir, input_vue_map);
brw_nir_lower_tes_inputs(nir, &input_vue_map);
brw_nir_lower_vue_outputs(nir);
NIR_PASS(_, nir, intel_nir_lower_patch_vertices_tes);
brw_postprocess_nir(nir, compiler, debug_enabled,
key->base.robust_flags);
@ -155,7 +167,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
if (unlikely(debug_enabled)) {
fprintf(stderr, "TES Input ");
brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL);
brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_EVAL);
fprintf(stderr, "TES Output ");
brw_print_vue_map(stderr, &prog_data->base.vue_map,
MESA_SHADER_TESS_EVAL);

View file

@ -319,7 +319,10 @@ struct brw_tcs_prog_key
/** A bitfield of per-patch outputs written. */
uint32_t patch_outputs_written;
uint32_t padding;
/** Tesselation VUE layout */
bool separate_tess_vue_layout:1;
uint32_t padding:31;
};
#define BRW_MAX_TCS_INPUT_VERTICES (32)
@ -342,7 +345,10 @@ struct brw_tes_prog_key
/** A bitfield of per-patch inputs read. */
uint32_t patch_inputs_read;
uint32_t padding;
/** Tesselation VUE layout */
bool separate_tess_vue_layout:1;
uint32_t padding:31;
};
/** The program key for Geometry Shaders. */
@ -1199,11 +1205,23 @@ struct brw_tcs_prog_data
/** Should the non-SINGLE_PATCH payload provide primitive ID? */
bool include_primitive_id;
/** Whether the tessellation domain is unknown at compile time
*
* Used with VK_EXT_shader_object
*/
bool dynamic_domain;
/** Number vertices in output patch */
int instances;
/** Track patch count threshold */
int patch_count_threshold;
/**
* Push constant location of intel_tess_config (dynamic configuration of
* the tessellation shaders).
*/
unsigned tess_config_param;
};
@ -1215,6 +1233,12 @@ struct brw_tes_prog_data
enum intel_tess_output_topology output_topology;
enum intel_tess_domain domain;
bool include_primitive_id;
/**
* Push constant location of intel_tess_config (dynamic configuration of
* the tessellation shaders).
*/
unsigned tess_config_param;
};
struct brw_gs_prog_data

View file

@ -3321,6 +3321,11 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
break;
}
case nir_intrinsic_load_tess_config_intel:
bld.MOV(retype(dst, BRW_TYPE_UD),
brw_uniform_reg(tcs_prog_data->tess_config_param, BRW_TYPE_UD));
break;
default:
brw_from_nir_emit_intrinsic(ntb, bld, instr);
break;
@ -3429,6 +3434,12 @@ brw_from_nir_emit_tes_intrinsic(nir_to_brw_state &ntb,
}
break;
}
case nir_intrinsic_load_tess_config_intel:
bld.MOV(retype(dest, BRW_TYPE_UD),
brw_uniform_reg(tes_prog_data->tess_config_param, BRW_TYPE_UD));
break;
default:
brw_from_nir_emit_intrinsic(ntb, bld, instr);
break;

View file

@ -142,23 +142,53 @@ type_size_dvec4(const struct glsl_type *type, bool bindless)
}
static bool
remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
enum tess_primitive_mode _primitive_mode)
is_input(nir_intrinsic_instr *intrin)
{
const int location = nir_intrinsic_base(intr);
return intrin->intrinsic == nir_intrinsic_load_input ||
intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
intrin->intrinsic == nir_intrinsic_load_interpolated_input;
}
static bool
is_output(nir_intrinsic_instr *intrin)
{
return intrin->intrinsic == nir_intrinsic_load_output ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_store_per_view_output;
}
static bool
remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intr)) &&
!(b->shader->info.stage == MESA_SHADER_TESS_EVAL && is_input(intr)))
return false;
/* Handled in a different pass */
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intr);
if (io_sem.location != VARYING_SLOT_TESS_LEVEL_INNER &&
io_sem.location != VARYING_SLOT_TESS_LEVEL_OUTER)
return false;
const unsigned component = nir_intrinsic_component(intr);
bool out_of_bounds = false;
bool write = !nir_intrinsic_infos[intr->intrinsic].has_dest;
unsigned mask = write ? nir_intrinsic_write_mask(intr) : 0;
nir_def *src = NULL, *dest = NULL;
enum tess_primitive_mode _primitive_mode = (uintptr_t)data;
if (write) {
assert(intr->num_components == intr->src[0].ssa->num_components);
} else {
assert(intr->num_components == intr->def.num_components);
}
if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
if (io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER) {
b->cursor = write ? nir_before_instr(&intr->instr)
: nir_after_instr(&intr->instr);
@ -201,7 +231,7 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
default:
UNREACHABLE("Bogus tessellation domain");
}
} else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
} else {
b->cursor = write ? nir_before_instr(&intr->instr)
: nir_after_instr(&intr->instr);
@ -253,8 +283,6 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
default:
UNREACHABLE("Bogus tessellation domain");
}
} else {
return false;
}
if (out_of_bounds) {
@ -275,74 +303,192 @@ remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
}
static bool
is_input(nir_intrinsic_instr *intrin)
remap_tess_header_values(nir_shader *nir, enum tess_primitive_mode _primitive_mode)
{
return intrin->intrinsic == nir_intrinsic_load_input ||
intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
intrin->intrinsic == nir_intrinsic_load_interpolated_input;
return nir_shader_intrinsics_pass(nir, remap_tess_levels,
nir_metadata_control_flow,
(void*)(uintptr_t)_primitive_mode);
}
static bool
is_output(nir_intrinsic_instr *intrin)
{
return intrin->intrinsic == nir_intrinsic_load_output ||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_load_per_view_output ||
intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
intrin->intrinsic == nir_intrinsic_store_per_view_output;
}
struct remap_patch_urb_offset_state {
const struct intel_vue_map *vue_map;
enum tess_primitive_mode tes_primitive_mode;
struct tess_levels_temporary_state {
nir_variable *inner_factors_var;
nir_variable *outer_factors_var;
};
static bool
remap_patch_urb_offsets(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
remap_tess_levels_to_temporary(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
struct remap_patch_urb_offset_state *state = data;
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intrin)))
return false;
/* Handled in a different pass */
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
if (io_sem.location != VARYING_SLOT_TESS_LEVEL_INNER &&
io_sem.location != VARYING_SLOT_TESS_LEVEL_OUTER)
return false;
struct tess_levels_temporary_state *state = data;
nir_variable *var = io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER ?
state->inner_factors_var : state->outer_factors_var;
if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
b->cursor = nir_after_instr(&intrin->instr);
nir_def *new_val =
nir_load_array_var(b, var,
nir_iadd_imm(b, nir_get_io_offset_src(intrin)->ssa,
nir_intrinsic_component(intrin)));
nir_def_replace(&intrin->def, new_val);
} else {
b->cursor = nir_instr_remove(&intrin->instr);
nir_store_array_var(b, var,
nir_iadd_imm(b, nir_get_io_offset_src(intrin)->ssa,
nir_intrinsic_component(intrin)),
intrin->src[0].ssa,
nir_intrinsic_write_mask(intrin));
}
return true;
}
static bool
remap_tess_header_values_dynamic(nir_shader *nir, const struct intel_device_info *devinfo)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
struct tess_levels_temporary_state state = {
.inner_factors_var = nir_local_variable_create(
impl, glsl_array_type(glsl_uint_type(), 2, 0),
"__temp_inner_factors"),
.outer_factors_var = nir_local_variable_create(
impl, glsl_array_type(glsl_uint_type(), 4, 0),
"__temp_outer_factors"),
};
nir_shader_intrinsics_pass(nir, remap_tess_levels_to_temporary,
nir_metadata_control_flow, &state);
nir_builder _b = nir_builder_at(nir_after_impl(impl)), *b = &_b;
nir_def *tess_config = nir_load_tess_config_intel(b);
nir_def *is_quad =
nir_test_mask(b, tess_config, INTEL_TESS_CONFIG_QUADS);
nir_def *is_tri =
nir_test_mask(b, tess_config, INTEL_TESS_CONFIG_TRIANGLES);
nir_def *is_quad_tri =
nir_test_mask(b, tess_config, (INTEL_TESS_CONFIG_QUADS |
INTEL_TESS_CONFIG_TRIANGLES));
nir_def *zero = nir_imm_int(b, 0);
/* Format below is described in the SKL PRMs, Volume 7: 3D-Media-GPGPU,
* Patch URB Entry (Patch Record) Output, Patch Header DW0-7
*
* Based on topology we use one of those :
* - Patch Header: QUAD Domain / LEGACY Patch Header Layout
* - Patch Header: TRI Domain / LEGACY Patch Header Layout
* - Patch Header: ISOLINE Domain / LEGACY Patch Header Layout
*
* There are more convenient layouts in more recent generations but they're
* not available on all platforms.
*/
nir_def *values[8] = {
zero,
zero,
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.inner_factors_var, 1), zero),
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.inner_factors_var, 0), zero),
nir_bcsel(b, is_quad, nir_load_array_var_imm(b, state.outer_factors_var, 3),
nir_bcsel(b, is_tri, nir_load_array_var_imm(b, state.inner_factors_var, 0),
zero)),
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 2), zero),
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 1),
nir_load_array_var_imm(b, state.outer_factors_var, 0)),
nir_bcsel(b, is_quad_tri, nir_load_array_var_imm(b, state.outer_factors_var, 0),
nir_load_array_var_imm(b, state.outer_factors_var, 1)),
};
nir_store_output(b, nir_vec(b, &values[0], 4), zero, .base = 0,
.io_semantics.location = VARYING_SLOT_TESS_LEVEL_INNER);
nir_store_output(b, nir_vec(b, &values[4], 4), zero, .base = 1,
.io_semantics.location = VARYING_SLOT_TESS_LEVEL_OUTER);
nir_progress(true, impl, nir_metadata_none);
return true;
}
static bool
remap_patch_urb_offsets_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
if (!(b->shader->info.stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) &&
!(b->shader->info.stage == MESA_SHADER_TESS_EVAL && is_input(intrin)))
return false;
if (remap_tess_levels(b, intrin, state->tes_primitive_mode))
/* Handled in a different pass */
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
if (io_sem.location == VARYING_SLOT_TESS_LEVEL_INNER ||
io_sem.location == VARYING_SLOT_TESS_LEVEL_OUTER)
return false;
int vue_slot = state->vue_map->varying_to_slot[intrin->const_index[0]];
gl_varying_slot varying = intrin->const_index[0];
const struct intel_vue_map *vue_map = data;
int vue_slot = vue_map->varying_to_slot[varying];
assert(vue_slot != -1);
intrin->const_index[0] = vue_slot;
nir_src *vertex = nir_get_io_arrayed_index_src(intrin);
if (vertex) {
if (nir_src_is_const(*vertex)) {
intrin->const_index[0] += nir_src_as_uint(*vertex) *
state->vue_map->num_per_vertex_slots;
} else {
b->cursor = nir_before_instr(&intrin->instr);
b->cursor = nir_before_instr(&intrin->instr);
/* Multiply by the number of per-vertex slots. */
nir_def *vertex_offset =
nir_imul(b,
vertex->ssa,
nir_imm_int(b,
state->vue_map->num_per_vertex_slots));
bool dyn_tess_config =
b->shader->info.stage == MESA_SHADER_TESS_EVAL &&
vue_map->layout != INTEL_VUE_LAYOUT_FIXED;
nir_def *num_per_vertex_slots =
dyn_tess_config ? intel_nir_tess_field(b, PER_VERTEX_SLOTS) :
nir_imm_int(b, vue_map->num_per_vertex_slots);
/* Add it to the existing offset */
nir_src *offset = nir_get_io_offset_src(intrin);
nir_def *total_offset =
nir_iadd(b, vertex_offset,
offset->ssa);
/* Multiply by the number of per-vertex slots. */
nir_def *vertex_offset = nir_imul(b, vertex->ssa, num_per_vertex_slots);
nir_src_rewrite(offset, total_offset);
/* Add it to the existing offset */
nir_src *offset = nir_get_io_offset_src(intrin);
nir_def *total_offset = nir_iadd(b, vertex_offset, offset->ssa);
/* In the Tessellation evaluation shader, reposition the offset of
* builtins when using separate layout.
*/
if (dyn_tess_config) {
if (varying < VARYING_SLOT_VAR0) {
nir_def *builtins_offset = intel_nir_tess_field(b, BUILTINS);
nir_def *builtins_base_offset = nir_iadd_imm(
b, builtins_offset,
vue_map->varying_to_slot[varying] - vue_map->builtins_slot_offset);
total_offset = nir_iadd(b, total_offset, builtins_base_offset);
} else {
nir_def *vertices_offset = intel_nir_tess_field(b, PER_PATCH_SLOTS);
nir_def *vertices_base_offset = nir_iadd_imm(
b, vertices_offset,
vue_map->varying_to_slot[varying] - vue_map->num_per_patch_slots);
total_offset = nir_iadd(b, total_offset, vertices_base_offset);
}
nir_intrinsic_set_base(intrin, 0);
}
nir_src_rewrite(offset, total_offset);
}
return true;
}
static bool
remap_non_header_patch_urb_offsets(nir_shader *nir, const struct intel_vue_map *vue_map)
{
return nir_shader_intrinsics_pass(nir, remap_patch_urb_offsets_instr,
nir_metadata_control_flow, (void *)vue_map);
}
/* Replace store_per_view_output to plain store_output, mapping the view index
* to IO offset. Because we only use per-view outputs for position, the offset
* pitch is always 1. */
@ -596,17 +742,22 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct intel_vue_map *vue_map)
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4,
nir_lower_io_lower_64bit_to_32);
/* This pass needs actual constants */
/* Run add_const_offset_to_base to allow update base/io_semantic::location
* for the remapping pass to look into the VUE mapping.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
NIR_PASS(_, nir, remap_non_header_patch_urb_offsets, vue_map);
NIR_PASS(_, nir, remap_tess_header_values, nir->info.tess._primitive_mode);
/* remap_non_header_patch_urb_offsets can add constant math into the
* shader, just fold it for the backend.
*/
NIR_PASS(_, nir, nir_opt_algebraic);
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, remap_patch_urb_offsets,
nir_metadata_control_flow,
&(struct remap_patch_urb_offset_state) {
.vue_map = vue_map,
.tes_primitive_mode = nir->info.tess._primitive_mode,
});
}
static bool
@ -874,7 +1025,9 @@ brw_nir_lower_vue_outputs(nir_shader *nir)
}
void
brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue_map,
brw_nir_lower_tcs_outputs(nir_shader *nir,
const struct intel_device_info *devinfo,
const struct intel_vue_map *vue_map,
enum tess_primitive_mode tes_primitive_mode)
{
nir_foreach_shader_out_variable(var, nir) {
@ -884,17 +1037,23 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue_map,
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
nir_lower_io_lower_64bit_to_32);
/* This pass needs actual constants */
/* Run add_const_offset_to_base to allow update base/io_semantic::location
* for the remapping pass to look into the VUE mapping.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, remap_patch_urb_offsets,
nir_metadata_control_flow,
&(struct remap_patch_urb_offset_state) {
.vue_map = vue_map,
.tes_primitive_mode = tes_primitive_mode,
});
NIR_PASS(_, nir, remap_non_header_patch_urb_offsets, vue_map);
if (tes_primitive_mode != TESS_PRIMITIVE_UNSPECIFIED)
NIR_PASS(_, nir, remap_tess_header_values, tes_primitive_mode);
else
NIR_PASS(_, nir, remap_tess_header_values_dynamic, devinfo);
/* remap_non_header_patch_urb_offsets can add constant math into the
* shader, just fold it for the backend.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
}
void

View file

@ -187,7 +187,9 @@ void brw_nir_lower_fs_inputs(nir_shader *nir,
const struct intel_device_info *devinfo,
const struct brw_wm_prog_key *key);
void brw_nir_lower_vue_outputs(nir_shader *nir);
void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct intel_vue_map *vue,
void brw_nir_lower_tcs_outputs(nir_shader *nir,
const struct intel_device_info *devinfo,
const struct intel_vue_map *vue,
enum tess_primitive_mode tes_primitive_mode);
void brw_nir_lower_fs_outputs(nir_shader *nir);

View file

@ -263,6 +263,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
* recompute state when TF changes, so we just always include it.
*/
if (layout != INTEL_VUE_LAYOUT_SEPARATE_MESH) {
vue_map->builtins_slot_offset = slot;
const uint64_t builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
u_foreach_bit64(varying, builtins) {
/* Already assigned above? */
@ -273,7 +274,11 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
}
const int first_generic_slot = slot;
const uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
const uint64_t generics =
(layout != INTEL_VUE_LAYOUT_FIXED ?
BITFIELD64_MASK(util_last_bit64(slots_valid)) :
slots_valid) &
~BITFIELD64_MASK(VARYING_SLOT_VAR0);
u_foreach_bit64(varying, generics) {
if (layout != INTEL_VUE_LAYOUT_FIXED) {
slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
@ -282,6 +287,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
}
if (layout == INTEL_VUE_LAYOUT_SEPARATE_MESH) {
vue_map->builtins_slot_offset = slot;
const uint64_t builtins = slots_valid & BITFIELD64_MASK(VARYING_SLOT_VAR0);
u_foreach_bit64(varying, builtins) {
/* Already assigned above? */
@ -314,8 +320,11 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
VARYING_BIT_TESS_LEVEL_INNER);
if (separate)
if (separate) {
vertex_slots |= VARYING_BIT_POS;
vertex_slots |= VARYING_BIT_CLIP_DIST0;
vertex_slots |= VARYING_BIT_CLIP_DIST1;
}
/* Make sure that the values we store in vue_map->varying_to_slot and
* vue_map->slot_to_varying won't overflow the signed chars that are used
@ -381,9 +390,11 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
slot = first_generics_slot + varying - VARYING_SLOT_VAR0;
assign_vue_slot(vue_map, varying, slot++);
}
vue_map->builtins_slot_offset = slot;
u_foreach_bit64(varying, builtins)
assign_vue_slot(vue_map, varying, slot++);
} else {
vue_map->builtins_slot_offset = slot;
u_foreach_bit64(varying, builtins) {
assign_vue_slot(vue_map, varying, slot++);
}

View file

@ -386,8 +386,7 @@ elk_compile_tcs(const struct elk_compiler *compiler,
key->_tes_primitive_mode);
if (key->quads_workaround)
intel_nir_apply_tcs_quads_workaround(nir);
if (key->input_vertices > 0)
intel_nir_lower_patch_vertices_in(nir, key->input_vertices, NULL, NULL);
intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
elk_postprocess_nir(nir, compiler, debug_enabled,
key->base.robust_flags);

View file

@ -13,7 +13,10 @@ extern "C" {
struct intel_device_info;
#define intel_nir_tess_field(b, field) \
nir_ubitfield_extract_imm(b, nir_load_tess_config_intel(b), \
INTEL_TESS_CONFIG_##field##_OFFSET, \
INTEL_TESS_CONFIG_##field##_SIZE)
void intel_nir_apply_tcs_quads_workaround(nir_shader *nir);
bool brw_nir_rebase_const_offset_ubo_loads(nir_shader *shader);
@ -26,9 +29,9 @@ bool intel_nir_cleanup_resource_intel(nir_shader *shader);
bool intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
bool intel_nir_lower_non_uniform_resource_intel(nir_shader *shader);
bool intel_nir_lower_patch_vertices_in(nir_shader *shader,
unsigned input_vertices,
nir_lower_instr_cb cb,
void *data);
unsigned input_vertices);
bool intel_nir_lower_patch_vertices_tes(nir_shader *shader);
bool intel_nir_lower_shading_rate_output(nir_shader *nir);
bool intel_nir_lower_sparse_intrinsics(nir_shader *nir);

View file

@ -30,6 +30,7 @@
*/
#include "intel_nir.h"
#include "intel_shader_enums.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_deref.h"
@ -83,13 +84,11 @@ intel_nir_clamp_per_vertex_loads(nir_shader *shader)
struct lower_patch_vertices_state {
unsigned input_vertices;
nir_lower_instr_cb cb;
void *data;
};
static bool
lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin,
void *cb_data)
lower_patch_vertices_in_instr(nir_builder *b, nir_intrinsic_instr *intrin,
void *cb_data)
{
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
return false;
@ -101,24 +100,44 @@ lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin,
nir_def *val =
state->input_vertices ?
nir_imm_int(b, state->input_vertices) :
state->cb(b, &intrin->instr, state->data);
nir_def_rewrite_uses(&intrin->def, val);
nir_iadd_imm(b, intel_nir_tess_field(b, INPUT_VERTICES), 1);
nir_def_replace(&intrin->def, val);
return true;
}
bool
intel_nir_lower_patch_vertices_in(nir_shader *shader,
unsigned input_vertices,
nir_lower_instr_cb cb,
void *data)
unsigned input_vertices)
{
assert(input_vertices != 0 || cb != NULL);
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
struct lower_patch_vertices_state state = {
.input_vertices = input_vertices,
.cb = cb,
.data = data,
};
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_instr,
nir_metadata_control_flow, &state);
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_in_instr,
nir_metadata_none, &state);
}
static bool
lower_patch_vertices_tes_instr(nir_builder *b, nir_intrinsic_instr *intrin,
void *cb_data)
{
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def *field = intel_nir_tess_field(b, OUTPUT_VERTICES);
nir_def_replace(&intrin->def, nir_iadd_imm(b, field, 1));
return true;
}
bool
intel_nir_lower_patch_vertices_tes(nir_shader *shader)
{
assert(shader->info.stage == MESA_SHADER_TESS_EVAL);
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_tes_instr,
nir_metadata_none, NULL);
}

View file

@ -30,6 +30,57 @@ intel_sometimes_invert(enum intel_sometimes x)
return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
}
#define INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET (0)
#define INTEL_TESS_CONFIG_INPUT_VERTICES_SIZE (5)
#define INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET (5)
#define INTEL_TESS_CONFIG_OUTPUT_VERTICES_SIZE (5)
#define INTEL_TESS_CONFIG_BUILTINS_OFFSET (10)
#define INTEL_TESS_CONFIG_BUILTINS_SIZE (6)
#define INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET (16)
#define INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_SIZE (6)
#define INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET (22)
#define INTEL_TESS_CONFIG_PER_PATCH_SLOTS_SIZE (6)
enum intel_tess_configs {
/** Tessellation inputs vertices minus 1
*
* This field actually covers 5bits.
*/
INTEL_TESS_CONFIG_INPUT_VERTICES = BITFIELD_BIT(INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET),
/** Tessellation outputs vertices minus 1
*
* This field actually covers 5bits.
*/
INTEL_TESS_CONFIG_OUTPUT_VERTICES = BITFIELD_BIT(INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET),
/** Tessellation builtins per-vertex offset
*
* This field actually covers 5bits.
*/
INTEL_TESS_CONFIG_BUILTINS = BITFIELD_BIT(INTEL_TESS_CONFIG_BUILTINS_OFFSET),
/** Number of per vertex slots
*
* This field actually covers 6bits.
*/
INTEL_TESS_PER_VERTEX_SLOTS = BITFIELD_BIT(INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET),
/** Number of per patch slots
*
* This field actually covers 6bits.
*/
INTEL_TESS_PER_PATCH_SLOTS = BITFIELD_BIT(INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET),
/** Tesselation primitive modes
*
* Only one of the following 3 bits should be set.
*/
INTEL_TESS_CONFIG_QUADS = BITFIELD_BIT(29),
INTEL_TESS_CONFIG_TRIANGLES = BITFIELD_BIT(30),
INTEL_TESS_CONFIG_ISOLINES = BITFIELD_BIT(31)
};
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_OFFSET (19)
#define INTEL_MSAA_FLAG_FIRST_VUE_SLOT_SIZE (6)
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (25)
@ -294,6 +345,11 @@ struct intel_vue_map {
* shader outputs and tessellation evaluation shader inputs.
*/
int num_per_vertex_slots;
/**
* Location at which the builtins live.
*/
int builtins_slot_offset;
};
struct intel_cs_dispatch_info {
@ -314,6 +370,36 @@ enum intel_compute_walk_order {
INTEL_WALK_ORDER_ZYX = 5,
};
static inline uint32_t
intel_tess_config(uint32_t input_vertices,
uint32_t output_vertices,
enum intel_tess_domain tess_domain,
uint32_t num_per_patch_slots,
uint32_t num_per_vertex_slots,
uint32_t builtins_slot_offset)
{
assert(num_per_patch_slots < (1u << INTEL_TESS_CONFIG_PER_PATCH_SLOTS_SIZE));
assert(num_per_vertex_slots < (1u << INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_SIZE));
assert(builtins_slot_offset < (1u << INTEL_TESS_CONFIG_BUILTINS_SIZE));
assert(input_vertices != 0);
assert((input_vertices - 1) < (1u << INTEL_TESS_CONFIG_INPUT_VERTICES_SIZE));
assert((output_vertices - 1) < (1u << INTEL_TESS_CONFIG_OUTPUT_VERTICES_SIZE));
const uint32_t primitive_flags =
tess_domain == INTEL_TESS_DOMAIN_TRI ? INTEL_TESS_CONFIG_TRIANGLES :
tess_domain == INTEL_TESS_DOMAIN_QUAD ? INTEL_TESS_CONFIG_QUADS :
INTEL_TESS_CONFIG_ISOLINES;
return
(((input_vertices - 1) & 0x1f) << INTEL_TESS_CONFIG_INPUT_VERTICES_OFFSET) |
(((output_vertices - 1) & 0x1f) << INTEL_TESS_CONFIG_OUTPUT_VERTICES_OFFSET) |
primitive_flags |
(num_per_patch_slots << INTEL_TESS_CONFIG_PER_PATCH_SLOTS_OFFSET) |
(num_per_vertex_slots << INTEL_TESS_CONFIG_PER_VERTEX_SLOTS_OFFSET) |
(builtins_slot_offset << INTEL_TESS_CONFIG_BUILTINS_OFFSET);
}
static inline bool
intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,
bool shader_per_sample_shading,

View file

@ -114,11 +114,16 @@ bool anv_nir_apply_pipeline_layout(nir_shader *shader,
struct anv_pipeline_push_map *push_map,
void *push_map_mem_ctx);
struct anv_nir_push_layout_info {
bool fragment_dynamic;
bool mesh_dynamic;
};
bool anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
enum brw_robustness_flags robust_flags,
bool fragment_dynamic,
bool mesh_dynamic,
const struct anv_nir_push_layout_info *info,
struct brw_base_prog_key *prog_key,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,

View file

@ -30,8 +30,8 @@ bool
anv_nir_compute_push_layout(nir_shader *nir,
const struct anv_physical_device *pdevice,
enum brw_robustness_flags robust_flags,
bool fragment_dynamic,
bool mesh_dynamic,
const struct anv_nir_push_layout_info *push_info,
struct brw_base_prog_key *prog_key,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map,
const struct anv_pipeline_push_map *push_map,
@ -92,7 +92,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
const bool needs_wa_18019110168 =
nir->info.stage == MESA_SHADER_FRAGMENT &&
brw_nir_fragment_shader_needs_wa_18019110168(
devinfo, mesh_dynamic ? INTEL_SOMETIMES : INTEL_NEVER, nir);
devinfo, push_info->mesh_dynamic ? INTEL_SOMETIMES : INTEL_NEVER, nir);
if (push_ubo_ranges && (robust_flags & BRW_ROBUSTNESS_UBO)) {
/* We can't on-the-fly adjust our push ranges because doing so would
@ -111,7 +111,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
}
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
if (fragment_dynamic) {
if (push_info->fragment_dynamic) {
const uint32_t fs_msaa_flags_start =
anv_drv_const_offset(gfx.fs_msaa_flags);
const uint32_t fs_msaa_flags_end =
@ -132,6 +132,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
}
}
const bool needs_dyn_tess_config =
nir->info.stage == MESA_SHADER_TESS_CTRL &&
container_of(prog_key, struct brw_tcs_prog_key, base)->input_vertices == 0;
if (needs_dyn_tess_config) {
const uint32_t tess_config_start = anv_drv_const_offset(gfx.tess_config);
const uint32_t tess_config_end = tess_config_start +
anv_drv_const_size(gfx.tess_config);
push_start = MIN2(push_start, tess_config_start);
push_end = MAX2(push_end, tess_config_end);
}
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
/* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push
@ -244,7 +255,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
*/
const bool needs_padding_per_primitive =
needs_wa_18019110168 ||
(mesh_dynamic &&
(push_info->mesh_dynamic &&
(nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID));
unsigned n_push_ranges = 0;
@ -344,11 +355,20 @@ anv_nir_compute_push_layout(nir_shader *nir,
assert(n_push_ranges <= 4);
if (nir->info.stage == MESA_SHADER_TESS_CTRL && needs_dyn_tess_config) {
struct brw_tcs_prog_data *tcs_prog_data =
container_of(prog_data, struct brw_tcs_prog_data, base.base);
const uint32_t tess_config_offset =
anv_drv_const_offset(gfx.tess_config);
assert(tess_config_offset >= push_start);
tcs_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
}
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct brw_wm_prog_data *wm_prog_data =
container_of(prog_data, struct brw_wm_prog_data, base);
if (fragment_dynamic) {
if (push_info->fragment_dynamic) {
const uint32_t fs_msaa_flags_offset =
anv_drv_const_offset(gfx.fs_msaa_flags);
assert(fs_msaa_flags_offset >= push_start);

View file

@ -940,12 +940,6 @@ accept_64bit_atomic_cb(const nir_intrinsic_instr *intrin, const void *data)
intrin->def.bit_size == 64;
}
static nir_def *
build_tcs_input_vertices(nir_builder *b, nir_instr *instr, void *data)
{
return anv_load_driver_uniform(b, 1, gfx.tcs_input_vertices);
}
static void
fixup_large_workgroup_image_coherency(nir_shader *nir)
{
@ -1055,15 +1049,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
}
/* The patch control points are delivered through a push constant when
* dynamic.
*/
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS(_, nir, intel_nir_lower_patch_vertices_in,
stage->key.tcs.input_vertices,
build_tcs_input_vertices, NULL);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
/* Apply lowering for 64bit atomics pre-Xe2 */
@ -1169,9 +1154,12 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
NIR_PASS(_, nir, anv_nir_compute_push_layout,
pdevice, stage->key.base.robust_flags,
anv_graphics_pipeline_stage_fragment_dynamic(stage),
anv_graphics_pipeline_stage_mesh_dynamic(stage),
prog_data, &stage->bind_map, &push_map, mem_ctx);
&(struct anv_nir_push_layout_info) {
.fragment_dynamic = anv_graphics_pipeline_stage_fragment_dynamic(stage),
.mesh_dynamic = anv_graphics_pipeline_stage_mesh_dynamic(stage),
},
&stage->key.base, prog_data,
&stage->bind_map, &push_map, mem_ctx);
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
stage->bind_map.layout_type);

View file

@ -1562,7 +1562,7 @@ enum anv_gfx_state_bits {
ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
ANV_GFX_STATE_FS_MSAA_FLAGS,
ANV_GFX_STATE_TCS_INPUT_VERTICES,
ANV_GFX_STATE_TESS_CONFIG,
ANV_GFX_STATE_MESH_PROVOKING_VERTEX,
ANV_GFX_STATE_MAX,
@ -1949,12 +1949,9 @@ struct anv_gfx_dynamic_state {
enum intel_msaa_flags fs_msaa_flags;
/**
* Dynamic TCS input vertices, this value can be different from
* anv_driver_constants::gfx::tcs_input_vertices, as the push constant
* value only needs to be updated for tesselation control shaders
* dynamically checking the value.
* Dynamic tesselation configuration (see enum intel_tess_config).
*/
uint32_t tcs_input_vertices;
uint32_t tess_config;
/**
* Provoking vertex index, sent to the mesh shader for Wa_18019110168.
@ -3972,8 +3969,8 @@ struct anv_push_constants {
/** Dynamic MSAA value */
uint32_t fs_msaa_flags;
/** Dynamic TCS input vertices */
uint32_t tcs_input_vertices;
/** Dynamic TCS/TES configuration */
uint32_t tess_config;
/** Robust access pushed registers. */
uint8_t push_reg_mask[MESA_SHADER_STAGES][4];

View file

@ -186,7 +186,7 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
NAME(WA_14018283232);
NAME(TBIMR_TILE_PASS_INFO);
NAME(FS_MSAA_FLAGS);
NAME(TCS_INPUT_VERTICES);
NAME(TESS_CONFIG);
NAME(MESH_PROVOKING_VERTEX);
default: UNREACHABLE("invalid state");
}

View file

@ -2440,10 +2440,19 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
* reemit if needed.
*/
const struct brw_tcs_prog_data *tcs_prog_data = get_gfx_tcs_prog_data(gfx);
if (tcs_prog_data && tcs_prog_data->input_vertices == 0 &&
const bool tcs_dynamic =
tcs_prog_data && tcs_prog_data->input_vertices == 0;
if (tcs_dynamic &&
((gfx->dirty & ANV_CMD_DIRTY_HS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))) {
SET(TESS_CONFIG, tess_config,
intel_tess_config(dyn->ts.patch_control_points,
tcs_prog_data->instances,
0,
tcs_prog_data->base.vue_map.num_per_patch_slots,
tcs_prog_data->base.vue_map.num_per_vertex_slots,
tcs_prog_data->base.vue_map.builtins_slot_offset));
}
#if INTEL_WA_18019110168_GFX_VER
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
@ -3388,7 +3397,7 @@ genX(emit_urb_setup)(struct anv_batch *batch,
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (device->vk.enabled_features.meshShader) {
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
if (urb_cfg->size[MESA_SHADER_TASK] > 0)
urb.TASKURBEntryAllocationSize = urb_cfg->size[MESA_SHADER_TASK] - 1;
@ -3463,8 +3472,8 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
* Values provided by push constants
*/
if (IS_DIRTY(TCS_INPUT_VERTICES)) {
push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
if (IS_DIRTY(TESS_CONFIG)) {
push_consts->gfx.tess_config = hw_state->tess_config;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
gfx->base.push_constants_data_dirty = true;
}