mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
pan/compiler: Group outputs in lower_vs_outputs
Previously bifrost_nir_lower_shader_output grouped outputs in separate if blocks and made a best-effort attempt to group them together. This also assumed that pan_nir_lower_store_component wrote each output only once and that nir_lower_io_vars_to_temporaries pulled them out of any control flow. Now all of these are handled by the new pan_nir_lower_vs_outputs pass that handles write masks, control flow, per_view and grouping for IDVS. This makes the overall dependencies much simpler, ensures that the stores are grouped in the same ifs and should be more robust. Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40537>
This commit is contained in:
parent
66bee415ad
commit
445a22acbd
6 changed files with 193 additions and 196 deletions
|
|
@ -1015,34 +1015,6 @@ bi_is_zs(unsigned location)
|
|||
return location == FRAG_RESULT_DEPTH || location == FRAG_RESULT_STENCIL;
|
||||
}
|
||||
|
||||
static bool
|
||||
bifrost_nir_lower_shader_output_impl(struct nir_builder *b,
|
||||
nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||
intr->intrinsic != nir_intrinsic_store_per_view_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned mask = va_shader_output_from_loc(sem.location);
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *shader_output = nir_load_shader_output_pan(b);
|
||||
|
||||
nir_push_if(b, nir_i2b(b, nir_iand_imm(b, shader_output, mask)));
|
||||
nir_builder_instr_insert(b, &intr->instr);
|
||||
nir_pop_if(b, NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
bifrost_nir_lower_shader_output(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader,
|
||||
bifrost_nir_lower_shader_output_impl,
|
||||
nir_metadata_none, NULL);
|
||||
}
|
||||
|
||||
/* Atomics and memory write on the vertex stage have implementation-defined
|
||||
* behaviors on how many invocations will happen. However for some reasons,
|
||||
* atomic counters on GL/GLES specs are quite ambigous here and even have tests
|
||||
|
|
@ -6133,13 +6105,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
|
|||
NIR_PASS(_, nir, nir_lower_viewport_transform);
|
||||
NIR_PASS(_, nir, nir_lower_point_size, 1.0, 0.0);
|
||||
NIR_PASS(_, nir, pan_nir_lower_noperspective_vs);
|
||||
|
||||
/* nir_lower[_explicit]_io is lazy and emits mul+add chains even
|
||||
* for offsets it could figure out are constant. Do some
|
||||
* constant folding before pan_nir_lower_store_component below.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, pan_nir_lower_store_component);
|
||||
}
|
||||
|
||||
nir_lower_mem_access_bit_sizes_options mem_size_options = {
|
||||
|
|
@ -6853,26 +6818,6 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
if (info->vs.idvs && nir->info.writes_memory)
|
||||
NIR_PASS(_, nir, bifrost_nir_lower_vs_atomics);
|
||||
|
||||
if (info->vs.idvs) {
|
||||
bool shader_output_pass = false;
|
||||
NIR_PASS(shader_output_pass, nir, bifrost_nir_lower_shader_output);
|
||||
|
||||
/* If shader output lower made progress, ensure to merge adjacent if that were added */
|
||||
if (shader_output_pass) {
|
||||
/* First we clean up and deduplicate added condition logic */
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
|
||||
/* We then move vec closer to their I/O stores to ensure nothing is between ifs */
|
||||
NIR_PASS(_, nir, nir_opt_sink, nir_move_copies);
|
||||
NIR_PASS(_, nir, nir_opt_move, nir_move_copies);
|
||||
|
||||
/* Finally run if optimization to ensure trivial if blocks are merged together */
|
||||
NIR_PASS(_, nir, nir_opt_if, 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool has_extended_fifo = false;
|
||||
if (pan_arch(inputs->gpu_id) >= 9) {
|
||||
const uint64_t outputs = nir->info.outputs_written;
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ libpanfrost_compiler_files = files(
|
|||
'pan_nir_lower_image_ms.c',
|
||||
'pan_nir_lower_noperspective.c',
|
||||
'pan_nir_lower_sample_position.c',
|
||||
'pan_nir_lower_store_component.c',
|
||||
'pan_nir_lower_texel_buffer_index.c',
|
||||
'pan_nir_lower_vertex_id.c',
|
||||
'pan_nir_lower_vs_outputs.c',
|
||||
|
|
@ -29,8 +28,8 @@ subdir('midgard')
|
|||
libpanfrost_compiler = static_library(
|
||||
'panfrost_compiler',
|
||||
[libpanfrost_compiler_files],
|
||||
include_directories : [inc_include, inc_src],
|
||||
dependencies: [idep_nir, idep_mesautil],
|
||||
include_directories : [inc_include, inc_src, inc_valhall],
|
||||
dependencies: [idep_nir, idep_mesautil, idep_valhall_enums_h, idep_bi_opcodes_h],
|
||||
link_with: [libpanfrost_midgard, libpanfrost_bifrost],
|
||||
c_args : [no_override_init_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
|
|
|
|||
|
|
@ -388,13 +388,6 @@ midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
|
|||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, nir, nir_lower_viewport_transform);
|
||||
NIR_PASS(_, nir, nir_lower_point_size, 1.0, 0.0);
|
||||
|
||||
/* nir_lower[_explicit]_io is lazy and emits mul+add chains even
|
||||
* for offsets it could figure out are constant. Do some
|
||||
* constant folding before pan_nir_lower_store_component below.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, pan_nir_lower_store_component);
|
||||
}
|
||||
|
||||
/* Could be eventually useful for Vulkan, but we don't expect it to have
|
||||
|
|
@ -2989,6 +2982,12 @@ midgard_compile_shader_nir(nir_shader *nir,
|
|||
pan_nir_collect_noperspective_varyings_fs(nir);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, nir, pan_nir_lower_vs_outputs, inputs->gpu_id,
|
||||
inputs->varying_layout, false /* has_idvs */,
|
||||
false /* has_extended_fifo */);
|
||||
}
|
||||
|
||||
/* Optimisation passes */
|
||||
optimise_nir(nir, ctx->quirks, inputs->is_blend);
|
||||
|
||||
|
|
|
|||
|
|
@ -47,8 +47,6 @@ pan_nir_tile_default_coverage(nir_builder *b)
|
|||
|
||||
bool pan_nir_lower_bool_to_bitsize(nir_shader *shader);
|
||||
|
||||
bool pan_nir_lower_store_component(nir_shader *shader);
|
||||
|
||||
bool pan_nir_lower_vertex_id(nir_shader *shader);
|
||||
|
||||
bool pan_nir_lower_image_ms(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2022 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "pan_nir.h"
|
||||
|
||||
/*
|
||||
* If the shader packs multiple varyings into the same location with different
|
||||
* location_frac, we'll need to lower to a single varying store that collects
|
||||
* all of the channels together. This is because the varying instruction on
|
||||
* Midgard and Bifrost is slot-based, writing out an entire vec4 slot at a time.
|
||||
*
|
||||
* NOTE: this expects all stores to be outside of control flow, and with
|
||||
* constant offsets. It should be run after nir_lower_io_vars_to_temporaries.
|
||||
*/
|
||||
static bool
|
||||
lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||
intr->intrinsic != nir_intrinsic_store_per_view_output)
|
||||
return false;
|
||||
|
||||
struct hash_table_u64 *slots = data;
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
nir_src *slot_src = nir_get_io_offset_src(intr);
|
||||
uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_per_view_output) {
|
||||
uint64_t view_index = nir_src_as_uint(intr->src[1]);
|
||||
slot |= view_index << 32;
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
|
||||
unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
|
||||
|
||||
nir_def *value = intr->src[0].ssa;
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *undef = nir_undef(b, 1, value->bit_size);
|
||||
nir_def *channels[4] = {undef, undef, undef, undef};
|
||||
|
||||
/* Copy old */
|
||||
u_foreach_bit(i, mask) {
|
||||
assert(prev != NULL);
|
||||
nir_def *prev_ssa = prev->src[0].ssa;
|
||||
channels[i] = nir_channel(b, prev_ssa, i);
|
||||
}
|
||||
|
||||
/* Copy new */
|
||||
unsigned new_mask = nir_intrinsic_write_mask(intr);
|
||||
mask |= (new_mask << component);
|
||||
|
||||
u_foreach_bit(i, new_mask) {
|
||||
assert(component + i < 4);
|
||||
channels[component + i] = nir_channel(b, value, i);
|
||||
}
|
||||
|
||||
intr->num_components = util_last_bit(mask);
|
||||
nir_src_rewrite(&intr->src[0], nir_vec(b, channels, intr->num_components));
|
||||
|
||||
nir_intrinsic_set_component(intr, 0);
|
||||
nir_intrinsic_set_write_mask(intr, mask);
|
||||
|
||||
if (prev) {
|
||||
_mesa_hash_table_u64_remove(slots, slot);
|
||||
nir_instr_remove(&prev->instr);
|
||||
}
|
||||
|
||||
_mesa_hash_table_u64_insert(slots, slot, intr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
pan_nir_lower_store_component(nir_shader *s)
|
||||
{
|
||||
assert(s->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
|
||||
bool progress = nir_shader_intrinsics_pass(
|
||||
s, lower_store_component,
|
||||
nir_metadata_control_flow, stores);
|
||||
_mesa_hash_table_u64_destroy(stores);
|
||||
return progress;
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2025 Collabora, Ltd.
|
||||
* Copyright (C) 2025-2026 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
|
|
@ -7,21 +7,30 @@
|
|||
#include "nir_builder.h"
|
||||
|
||||
#include "panfrost/model/pan_model.h"
|
||||
#include "bifrost/valhall/valhall.h"
|
||||
#include "util/bitscan.h"
|
||||
|
||||
struct lower_vs_outputs_ctx {
|
||||
unsigned arch;
|
||||
const struct pan_varying_layout *varying_layout;
|
||||
bool has_idvs;
|
||||
bool has_extended_fifo;
|
||||
|
||||
nir_variable *variables[PAN_MAX_VARYINGS];
|
||||
uint8_t per_view_written[PAN_MAX_VARYINGS];
|
||||
unsigned used_buckets;
|
||||
};
|
||||
|
||||
static void
|
||||
build_attr_buf_write(struct nir_builder *b, nir_def *data,
|
||||
const struct pan_varying_slot *slot, uint32_t view_index,
|
||||
build_attr_buf_write(struct nir_builder *b, nir_def *data, uint32_t idx,
|
||||
uint32_t view_index,
|
||||
const struct lower_vs_outputs_ctx *ctx)
|
||||
{
|
||||
/* We need the precise memory layout */
|
||||
pan_varying_layout_require_layout(ctx->varying_layout);
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_slot_at(ctx->varying_layout, idx);
|
||||
assert(slot);
|
||||
|
||||
nir_def *index = nir_load_idvs_output_buf_index_pan(b);
|
||||
|
||||
|
|
@ -78,61 +87,146 @@ build_attr_desc_write(struct nir_builder *b, nir_def *data, uint32_t base,
|
|||
nir_store_global_cvt_pan(b, data, addr, cvt, .src_type = src_type);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_vs_output_store(struct nir_builder *b,
|
||||
nir_intrinsic_instr *store, void *cb_data)
|
||||
static void
|
||||
build_store_output(struct nir_builder *b, nir_def *data, uint32_t idx,
|
||||
nir_alu_type src_type,
|
||||
const struct lower_vs_outputs_ctx *ctx)
|
||||
{
|
||||
const struct lower_vs_outputs_ctx *ctx = cb_data;
|
||||
pan_varying_layout_require_format(ctx->varying_layout);
|
||||
|
||||
if (store->intrinsic != nir_intrinsic_store_output &&
|
||||
store->intrinsic != nir_intrinsic_store_per_view_output)
|
||||
return false;
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_slot_at(ctx->varying_layout, idx);
|
||||
assert(slot);
|
||||
|
||||
b->cursor = nir_instr_remove(&store->instr);
|
||||
nir_io_semantics sem = {
|
||||
.location = slot->location,
|
||||
.num_slots = 1,
|
||||
};
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
|
||||
nir_alu_type src_type = nir_intrinsic_src_type(store);
|
||||
unsigned src_bit_size = nir_alu_type_get_type_size(src_type);
|
||||
nir_store_output(b, data, nir_imm_int(b, 0), .base = idx, .range = 1,
|
||||
.write_mask = BITFIELD_MASK(slot->ncomps), .component = 0,
|
||||
.src_type = src_type, .io_semantics = sem);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_vs_output_store(struct nir_builder *b,
|
||||
unsigned view_index,
|
||||
unsigned idx, const struct lower_vs_outputs_ctx *ctx)
|
||||
{
|
||||
nir_variable *var = ctx->variables[idx];
|
||||
assert(var != NULL);
|
||||
|
||||
nir_alu_type src_type =
|
||||
nir_get_nir_type_for_glsl_type(glsl_without_array(var->type));
|
||||
|
||||
bool is_per_view = glsl_type_is_array(var->type);
|
||||
assert(is_per_view || view_index == 0);
|
||||
nir_def *data = is_per_view ? nir_load_array_var_imm(b, var, view_index)
|
||||
: nir_load_var(b, var);
|
||||
|
||||
if (ctx->arch >= 9 && ctx->has_idvs) {
|
||||
build_attr_buf_write(b, data, idx, view_index, ctx);
|
||||
} else if (ctx->arch >= 6) {
|
||||
assert(!is_per_view);
|
||||
build_attr_desc_write(b, data, idx, src_type, ctx);
|
||||
} else {
|
||||
assert(!is_per_view);
|
||||
build_store_output(b, data, idx, src_type, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
get_or_create_var(nir_builder *b, struct lower_vs_outputs_ctx *ctx,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
assert(intr->intrinsic == nir_intrinsic_store_output ||
|
||||
intr->intrinsic == nir_intrinsic_store_per_view_output);
|
||||
bool is_per_view = intr->intrinsic == nir_intrinsic_store_per_view_output;
|
||||
ASSERTED nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned slot_idx = nir_intrinsic_base(intr);
|
||||
nir_alu_type src_type = nir_intrinsic_src_type(intr);
|
||||
enum glsl_base_type base_type = nir_get_glsl_base_type_for_nir_type(src_type);
|
||||
|
||||
/* Indirect array varyings are not yet supported (num_slots > 1) */
|
||||
assert(sem.num_slots == 1);
|
||||
assert(nir_src_as_uint(*nir_get_io_offset_src(store)) == 0);
|
||||
assert(nir_src_as_uint(*nir_get_io_offset_src(intr)) == 0);
|
||||
|
||||
/* We need the slot section for cache hints */
|
||||
nir_variable *var = ctx->variables[slot_idx];
|
||||
if (var != NULL) {
|
||||
/* All stores should agree per-location */
|
||||
assert(glsl_type_is_array(var->type) == is_per_view);
|
||||
assert(glsl_get_base_type(glsl_without_array(var->type)) == base_type);
|
||||
return var;
|
||||
}
|
||||
|
||||
/* We need the slot section for the number of components */
|
||||
pan_varying_layout_require_format(ctx->varying_layout);
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_find_slot(ctx->varying_layout, sem.location);
|
||||
pan_varying_layout_slot_at(ctx->varying_layout, slot_idx);
|
||||
/* Special slots are read only */
|
||||
assert(slot && slot->section != PAN_VARYING_SECTION_SPECIAL);
|
||||
/* From v9, IO is resized to the real size of the slot */
|
||||
assert(ctx->arch < 9 ||
|
||||
src_bit_size == nir_alu_type_get_type_size(slot->alu_type));
|
||||
assert(slot && slot->section != PAN_VARYING_SECTION_SPECIAL &&
|
||||
slot->location == sem.location);
|
||||
|
||||
nir_def *data = store->src[0].ssa;
|
||||
assert(src_bit_size == data->bit_size);
|
||||
const glsl_type *var_type = glsl_vector_type(base_type, slot->ncomps);
|
||||
if (is_per_view)
|
||||
var_type = glsl_array_type(var_type, PAN_MAX_MULTIVIEW_VIEW_COUNT, false);
|
||||
|
||||
/* Trim the input so we don't write extra channels at the end. In effect,
|
||||
* we fill in all the intermediate "holes" in the write mask, since we
|
||||
* can't mask off stores. Since nir_lower_io_vars_to_temporaries ensures
|
||||
* each varying is written at most once, anything that's masked out is
|
||||
* undefined, so it doesn't matter what we write there. So we may as well
|
||||
* do the simplest thing possible.
|
||||
var = nir_local_variable_create(b->impl, var_type, "vs_out_tmp");
|
||||
ctx->variables[slot_idx] = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
static bool
|
||||
gather_vs_outputs(struct nir_builder *b,
|
||||
nir_intrinsic_instr *intr, void *cb_data)
|
||||
{
|
||||
struct lower_vs_outputs_ctx *ctx = cb_data;
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||
intr->intrinsic != nir_intrinsic_store_per_view_output)
|
||||
return false;
|
||||
|
||||
unsigned mask = nir_intrinsic_write_mask(intr);
|
||||
unsigned slot_idx = nir_intrinsic_base(intr);
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
||||
bool is_per_view = intr->intrinsic == nir_intrinsic_store_per_view_output;
|
||||
unsigned view_index = is_per_view ? nir_src_as_uint(intr->src[1]) : 0;
|
||||
|
||||
ctx->per_view_written[slot_idx] |= BITFIELD_BIT(view_index);
|
||||
ctx->used_buckets |= BITFIELD_BIT(va_shader_output_from_loc(sem.location));
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_variable *var = get_or_create_var(b, ctx, intr);
|
||||
|
||||
unsigned ncomps = glsl_get_vector_elements(glsl_without_array(var->type));
|
||||
|
||||
/* store_output semantics differ from store_var when we write a subset of
|
||||
* the components, we need to re-swizzle the channels manually.
|
||||
*/
|
||||
const nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
|
||||
data = nir_trim_vector(b, data, util_last_bit(write_mask));
|
||||
nir_def *stored = intr->src[0].ssa;
|
||||
if (stored->num_components != ncomps) {
|
||||
nir_def *undef = nir_undef(b, 1, stored->bit_size);
|
||||
assert(ncomps <= 4);
|
||||
nir_def *channels[4] = {undef, undef, undef, undef};
|
||||
|
||||
if (ctx->arch >= 9 && ctx->has_idvs) {
|
||||
uint32_t view_index = 0;
|
||||
if (store->intrinsic == nir_intrinsic_store_per_view_output)
|
||||
view_index = nir_src_as_uint(store->src[1]);
|
||||
for (unsigned i = 0; i < stored->num_components; i++) {
|
||||
assert(component + i < 4);
|
||||
channels[component + i] = nir_channel(b, stored, i);
|
||||
}
|
||||
|
||||
build_attr_buf_write(b, data, slot, view_index, ctx);
|
||||
stored = nir_vec(b, channels, ncomps);
|
||||
mask = mask << component; /* mask was relative to intr->component */
|
||||
} else {
|
||||
uint32_t base = nir_intrinsic_base(store);
|
||||
assert(store->intrinsic != nir_intrinsic_store_per_view_output);
|
||||
build_attr_desc_write(b, data, base, src_type, ctx);
|
||||
assert(component == 0);
|
||||
}
|
||||
|
||||
if (is_per_view)
|
||||
nir_store_array_var_imm(b, var, view_index, stored, mask);
|
||||
else
|
||||
nir_store_var(b, var, stored, mask);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -143,14 +237,62 @@ pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
|
|||
bool has_extended_fifo)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
const struct lower_vs_outputs_ctx ctx = {
|
||||
struct lower_vs_outputs_ctx ctx = {
|
||||
.arch = pan_arch(gpu_id),
|
||||
.varying_layout = varying_layout,
|
||||
.has_idvs = has_idvs,
|
||||
.has_extended_fifo = has_extended_fifo,
|
||||
.variables = {NULL, },
|
||||
.per_view_written = {0, },
|
||||
.used_buckets = 0,
|
||||
};
|
||||
return nir_shader_intrinsics_pass(shader, lower_vs_output_store,
|
||||
nir_metadata_control_flow,
|
||||
(void *)&ctx);
|
||||
/* We use uint8 as a viewcount bitmask */
|
||||
assert(PAN_MAX_MULTIVIEW_VIEW_COUNT <= 8 * sizeof(ctx.per_view_written[0]));
|
||||
bool progress = nir_shader_intrinsics_pass(shader, gather_vs_outputs,
|
||||
nir_metadata_control_flow,
|
||||
(void *)&ctx);
|
||||
if (!progress)
|
||||
return false;
|
||||
|
||||
nir_builder builder = nir_builder_at(nir_after_impl(impl));
|
||||
nir_builder *b = &builder;
|
||||
nir_def *shader_output = has_idvs ? nir_load_shader_output_pan(b) : NULL;
|
||||
|
||||
for (int out = 0; out < VA_SHADER_OUTPUT_COUNT; out++) {
|
||||
/* Avoid looping a lot and adding ifs (in case of IDVS) */
|
||||
if (!(ctx.used_buckets & BITFIELD_BIT(out)))
|
||||
continue;
|
||||
|
||||
nir_if *nif = NULL;
|
||||
if (has_idvs) {
|
||||
nir_def *should_write =
|
||||
nir_i2b(b, nir_iand_imm(b, shader_output, BITFIELD_BIT(out)));
|
||||
nif = nir_push_if(b, should_write);
|
||||
}
|
||||
|
||||
for (unsigned idx = 0; idx < varying_layout->count; idx++) {
|
||||
nir_variable *var = ctx.variables[idx];
|
||||
if (var == NULL)
|
||||
continue;
|
||||
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_slot_at(varying_layout, idx);
|
||||
if (slot == NULL || va_shader_output_from_loc(slot->location) != out)
|
||||
continue;
|
||||
|
||||
/* Non-multiview stores are treated as writing just view index 0 */
|
||||
unsigned view_mask = ctx.per_view_written[idx];
|
||||
assert(view_mask != 0);
|
||||
|
||||
u_foreach_bit(view_index, view_mask)
|
||||
lower_vs_output_store(b, view_index, idx, &ctx);
|
||||
}
|
||||
|
||||
if (nif)
|
||||
nir_pop_if(b, nif);
|
||||
}
|
||||
|
||||
return nir_progress(true, impl, nir_metadata_none);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue