ac/nir: switch legacy GS lowering to ac_nir_prerast_out completely

This changes legacy GS outputs to use the same logic as NGG GS.
It enables the same optimizations that NGG has such as forwarding
constant GS output components to the GS copy shader at compile time.

ac_nir_gs_output_info is removed.

GS output info is no longer passed to ac_nir_lower_legacy_gs and
ac_nir_create_gs_copy_shader separately.

ac_nir_lower_legacy_gs now gathers ac_nir_prerast_out, generates GSVS ring
stores, and also generates the GS copy shader with GSVS ring loads.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35352>
This commit is contained in:
Marek Olšák 2025-05-31 11:53:18 -04:00 committed by Marge Bot
parent 723ce13f90
commit 42ad7543b8
8 changed files with 119 additions and 225 deletions

View file

@ -253,22 +253,6 @@ ac_nir_lower_global_access(nir_shader *shader);
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
bool ac_nir_lower_image_opcodes(nir_shader *nir);
typedef struct ac_nir_gs_output_info {
const uint8_t *streams;
const uint8_t *streams_16bit_lo;
const uint8_t *streams_16bit_hi;
const uint8_t *varying_mask;
const uint8_t *varying_mask_16bit_lo;
const uint8_t *varying_mask_16bit_hi;
const uint8_t *sysval_mask;
/* type for each 16bit slot component */
nir_alu_type (*types_16bit_lo)[4];
nir_alu_type (*types_16bit_hi)[4];
} ac_nir_gs_output_info;
bool
ac_nir_lower_legacy_vs(nir_shader *nir,
enum amd_gfx_level gfx_level,
@ -286,7 +270,6 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
typedef struct {
bool has_gen_prim_query;
bool has_pipeline_stats_query;
ac_nir_gs_output_info *output_info;
enum amd_gfx_level gfx_level;
uint32_t export_clipdist_mask;
@ -300,9 +283,6 @@ typedef struct {
bool force_vrs;
} ac_nir_lower_legacy_gs_options;
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options);
bool
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
nir_shader **gs_copy_shader);

View file

@ -11,11 +11,11 @@
#include "nir_xfb_info.h"
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options)
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options,
ac_nir_prerast_out *out)
{
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_VERTEX, gs_nir->options, "gs_copy");
ac_nir_gs_output_info *output_info = options->output_info;
b.shader->info.outputs_written = gs_nir->info.outputs_written;
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
@ -37,68 +37,63 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op
if (stream_id)
nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream));
uint32_t offset = 0;
ac_nir_prerast_out out = {0};
if (output_info->types_16bit_lo)
memcpy(&out.types_16bit_lo, output_info->types_16bit_lo, sizeof(out.types_16bit_lo));
if (output_info->types_16bit_hi)
memcpy(&out.types_16bit_hi, output_info->types_16bit_hi, sizeof(out.types_16bit_hi));
unsigned offset = 0;
u_foreach_bit64 (i, gs_nir->info.outputs_written) {
const uint8_t usage_mask = output_info->varying_mask[i] | output_info->sysval_mask[i];
out.infos[i].components_mask = usage_mask;
out.infos[i].as_varying_mask = output_info->varying_mask[i];
out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
u_foreach_bit (j, usage_mask) {
if (((output_info->streams[i] >> (j * 2)) & 0x3) != stream)
u_foreach_bit (j, out->infos[i].components_mask) {
if (((out->infos[i].stream >> (j * 2)) & 0x3) != stream)
continue;
out.outputs[i][j] =
if (ac_nir_is_const_output(out, i, j)) {
out->outputs[i][j] = ac_nir_get_const_output(&b, 32, out, i, j);
continue;
}
unsigned base = offset * gs_nir->info.gs.vertices_out * 16;
out->outputs[i][j] =
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
.base = offset,
.base = base,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
offset += gs_nir->info.gs.vertices_out * 16 * 4;
offset += 4;
}
}
u_foreach_bit (i, gs_nir->info.outputs_written_16bit) {
out.infos_16bit_lo[i].components_mask = output_info->varying_mask_16bit_lo[i];
out.infos_16bit_lo[i].as_varying_mask = output_info->varying_mask_16bit_lo[i];
out.infos_16bit_hi[i].components_mask = output_info->varying_mask_16bit_hi[i];
out.infos_16bit_hi[i].as_varying_mask = output_info->varying_mask_16bit_hi[i];
unsigned mask = out->infos_16bit_lo[i].components_mask |
out->infos_16bit_hi[i].components_mask;
for (unsigned j = 0; j < 4; j++) {
out.infos[i].as_varying_mask = output_info->varying_mask[i];
out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
u_foreach_bit (j, mask) {
bool has_lo_16bit = ((out->infos_16bit_lo[i].stream >> (j * 2)) & 0x3) == stream;
bool has_hi_16bit = ((out->infos_16bit_hi[i].stream >> (j * 2)) & 0x3) == stream;
bool has_lo_16bit = (output_info->varying_mask_16bit_lo[i] & (1 << j)) &&
((output_info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
bool has_hi_16bit = (output_info->varying_mask_16bit_hi[i] & (1 << j)) &&
((output_info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
if (!has_lo_16bit && !has_hi_16bit)
continue;
nir_def *data =
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
.base = offset,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
nir_def *load_val;
if (ac_nir_is_const_output(out, VARYING_SLOT_VAR0_16BIT + i, j)) {
load_val = ac_nir_get_const_output(&b, 32, out, i, j);
} else {
unsigned base = offset * gs_nir->info.gs.vertices_out * 16;
load_val = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
.base = base,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
offset += 4;
}
if (has_lo_16bit)
out.outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
out->outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, load_val);
if (has_hi_16bit)
out.outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data);
offset += gs_nir->info.gs.vertices_out * 16 * 4;
out->outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, load_val);
}
}
if (stream_id)
ac_nir_emit_legacy_streamout(&b, stream, info, &out);
ac_nir_emit_legacy_streamout(&b, stream, info, out);
/* This should be after streamout and before exports. */
ac_nir_clamp_vertex_color_outputs(&b, &out);
ac_nir_clamp_vertex_color_outputs(&b, out);
if (stream == 0) {
uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS;
@ -110,13 +105,13 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op
ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false,
options->write_pos_to_clipvertex, options->pack_clip_cull_distances,
!options->has_param_exports, options->force_vrs, export_outputs,
&out, NULL);
out, NULL);
if (options->has_param_exports) {
ac_nir_export_parameters(&b, options->param_offsets,
b.shader->info.outputs_written,
b.shader->info.outputs_written_16bit,
&out);
out);
}
}

View file

@ -11,6 +11,7 @@
#include "ac_hw_stage.h"
#include "ac_shader_args.h"
#include "ac_shader_util.h"
#include "ac_nir.h"
#include "nir_defines.h"
#ifdef __cplusplus
@ -237,6 +238,13 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b,
nir_def *buffer_offsets_ret[4],
nir_def *emit_prim_ret[4]);
bool
ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component);
nir_def *
ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
unsigned component);
void
ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
gl_varying_slot slot, unsigned component);
@ -272,6 +280,10 @@ ac_nir_compute_prerast_packed_output_info(ac_nir_prerast_out *pr_out);
unsigned
ac_nir_gs_output_component_mask_with_stream(ac_nir_prerast_per_output_info *info, unsigned stream);
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options,
ac_nir_prerast_out *out);
#ifdef __cplusplus
}
#endif

View file

@ -10,11 +10,7 @@
#include "nir_builder.h"
typedef struct {
nir_def *outputs[64][4];
nir_def *outputs_16bit_lo[16][4];
nir_def *outputs_16bit_hi[16][4];
ac_nir_gs_output_info *info;
ac_nir_prerast_out out;
nir_def *vertex_count[4];
nir_def *primitive_count[4];
@ -24,54 +20,7 @@ static bool
lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
lower_legacy_gs_state *s)
{
/* Assume:
* - the shader used nir_lower_io_vars_to_temporaries
* - 64-bit outputs are lowered
* - no indirect indexing is present
*/
assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
b->cursor = nir_before_instr(&intrin->instr);
unsigned component = nir_intrinsic_component(intrin);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
nir_def **outputs;
if (sem.location < VARYING_SLOT_VAR0_16BIT) {
outputs = s->outputs[sem.location];
} else {
unsigned index = sem.location - VARYING_SLOT_VAR0_16BIT;
if (sem.high_16bits)
outputs = s->outputs_16bit_hi[index];
else
outputs = s->outputs_16bit_lo[index];
}
nir_def *store_val = intrin->src[0].ssa;
/* 64bit output has been lowered to 32bit */
assert(store_val->bit_size <= 32);
/* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
const bool non_dedicated_16bit = sem.location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
u_foreach_bit (i, write_mask) {
unsigned comp = component + i;
nir_def *store_component = nir_channel(b, store_val, i);
if (non_dedicated_16bit) {
if (sem.high_16bits) {
nir_def *lo = outputs[comp] ? nir_unpack_32_2x16_split_x(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
outputs[comp] = nir_pack_32_2x16_split(b, lo, store_component);
} else {
nir_def *hi = outputs[comp] ? nir_unpack_32_2x16_split_y(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
outputs[comp] = nir_pack_32_2x16_split(b, store_component, hi);
}
} else {
outputs[comp] = store_component;
}
}
ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, true);
nir_instr_remove(&intrin->instr);
return true;
}
@ -89,29 +38,21 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
unsigned offset = 0;
u_foreach_bit64 (i, b->shader->info.outputs_written) {
for (unsigned j = 0; j < 4; j++) {
nir_def *output = s->outputs[i][j];
/* Next vertex emit need a new value, reset all outputs. */
s->outputs[i][j] = NULL;
const uint8_t usage_mask = s->info->varying_mask[i] | s->info->sysval_mask[i];
u_foreach_bit64 (slot, b->shader->info.outputs_written) {
unsigned mask = ac_nir_gs_output_component_mask_with_stream(&s->out.infos[slot], stream);
nir_def **output = s->out.outputs[slot];
if (!(usage_mask & (1 << j)) ||
((s->info->streams[i] >> (j * 2)) & 0x3) != stream)
continue;
unsigned base = offset * b->shader->info.gs.vertices_out * 4;
offset++;
/* no one set this output, skip the buffer store */
if (!output)
u_foreach_bit(c, mask) {
/* The shader hasn't written this output yet. */
if (!output[c] || ac_nir_is_const_output(&s->out, slot, c))
continue;
unsigned base = offset * b->shader->info.gs.vertices_out;
nir_def *voffset = nir_ishl_imm(b, vtxidx, 2);
/* extend 8/16 bit to 32 bit, 64 bit has been lowered */
nir_def *data = nir_u2uN(b, output, 32);
nir_def *store_val = nir_u2u32(b, output[c]);
unsigned align_mul = 4;
unsigned align_offset = 0;
@ -121,57 +62,56 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
align_offset = v_const_offset % align_mul;
}
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
nir_store_buffer_amd(b, store_val, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
ACCESS_IS_SWIZZLED_AMD,
.base = base,
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
.memory_modes = nir_var_shader_out,
.align_mul = align_mul, .align_offset = align_offset);
offset += 4;
}
/* Clear all outputs (they are undefined after emit_vertex) */
memset(s->out.outputs[slot], 0, sizeof(s->out.outputs[slot]));
}
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
for (unsigned j = 0; j < 4; j++) {
nir_def *output_lo = s->outputs_16bit_lo[i][j];
nir_def *output_hi = s->outputs_16bit_hi[i][j];
/* Next vertex emit need a new value, reset all outputs. */
s->outputs_16bit_lo[i][j] = NULL;
s->outputs_16bit_hi[i][j] = NULL;
u_foreach_bit (slot, b->shader->info.outputs_written_16bit) {
const unsigned mask_lo = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_lo + slot, stream);
const unsigned mask_hi = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_hi + slot, stream);
unsigned mask = mask_lo | mask_hi;
bool has_lo_16bit = (s->info->varying_mask_16bit_lo[i] & (1 << j)) &&
((s->info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
bool has_hi_16bit = (s->info->varying_mask_16bit_hi[i] & (1 << j)) &&
((s->info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
if (!has_lo_16bit && !has_hi_16bit)
nir_def **output_lo = s->out.outputs_16bit_lo[slot];
nir_def **output_hi = s->out.outputs_16bit_hi[slot];
nir_def *undef = nir_undef(b, 1, 16);
u_foreach_bit(c, mask) {
/* The shader hasn't written this output yet. */
if ((!output_lo[c] && !output_hi[c]) ||
ac_nir_is_const_output(&s->out, VARYING_SLOT_VAR0_16BIT + slot, c))
continue;
nir_def *lo = output_lo[c] ? output_lo[c] : undef;
nir_def *hi = output_hi[c] ? output_hi[c] : undef;
nir_def *store_val = nir_pack_32_2x16_split(b, lo, hi);
unsigned base = offset * b->shader->info.gs.vertices_out;
offset++;
bool has_lo_16bit_out = has_lo_16bit && output_lo;
bool has_hi_16bit_out = has_hi_16bit && output_hi;
/* no one set needed output, skip the buffer store */
if (!has_lo_16bit_out && !has_hi_16bit_out)
continue;
if (!has_lo_16bit_out)
output_lo = nir_undef(b, 1, 16);
if (!has_hi_16bit_out)
output_hi = nir_undef(b, 1, 16);
nir_def *voffset = nir_iadd_imm(b, vtxidx, base);
voffset = nir_ishl_imm(b, voffset, 2);
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
nir_store_buffer_amd(b, store_val,
gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
ACCESS_IS_SWIZZLED_AMD,
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
.memory_modes = nir_var_shader_out);
offset += 4;
}
/* Clear all outputs (they are undefined after emit_vertex) */
memset(s->out.outputs_16bit_lo[slot], 0, sizeof(s->out.outputs_16bit_lo[slot]));
memset(s->out.outputs_16bit_hi[slot], 0, sizeof(s->out.outputs_16bit_hi[slot]));
}
/* Signal vertex emission. */
@ -229,13 +169,33 @@ lower_legacy_gs_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *sta
return false;
}
static bool
gather_output_store_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
{
lower_legacy_gs_state *s = (lower_legacy_gs_state *) state;
if (intrin->intrinsic == nir_intrinsic_store_output) {
ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, false);
return true;
}
return false;
}
static void
gather_output_stores(nir_shader *shader, lower_legacy_gs_state *s)
{
nir_shader_intrinsics_pass(shader, gather_output_store_intrinsic, nir_metadata_none, s);
}
bool
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
nir_shader **gs_copy_shader)
{
lower_legacy_gs_state s = {
.info = options->output_info,
};
lower_legacy_gs_state s = {0};
gather_output_stores(nir, &s);
ac_nir_compute_prerast_packed_output_info(&s.out);
unsigned num_vertices_per_primitive = 0;
switch (nir->info.gs.output_primitive) {
@ -284,6 +244,6 @@ ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
nir_progress(progress, impl, nir_metadata_none);
*gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options);
*gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options, &s.out);
return true;
}

View file

@ -1407,8 +1407,8 @@ ac_nir_ngg_get_xfb_lds_offset(ac_nir_prerast_out *pr_out, gl_varying_slot slot,
return lds_slot_offset + util_bitcount(lds_component_mask & BITFIELD_MASK(component)) * 4;
}
static bool
is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component)
bool
ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component)
{
if (slot >= VARYING_SLOT_VAR0_16BIT) {
slot -= VARYING_SLOT_VAR0_16BIT;
@ -1419,11 +1419,11 @@ is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned compo
}
}
static nir_def *
get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
unsigned component)
nir_def *
ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
unsigned component)
{
if (!is_const_output(pr_out, slot, component))
if (!ac_nir_is_const_output(pr_out, slot, component))
return NULL;
if (slot >= VARYING_SLOT_VAR0_16BIT)
@ -1437,7 +1437,7 @@ ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_
gl_varying_slot slot, unsigned component)
{
assert(value->num_components == 1);
if (is_const_output(pr_out, slot, component))
if (ac_nir_is_const_output(pr_out, slot, component))
return;
unsigned offset = ac_nir_ngg_get_xfb_lds_offset(pr_out, slot, component, value->bit_size == 16);
@ -1448,7 +1448,7 @@ nir_def *
ac_nir_load_shared_xfb(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
gl_varying_slot slot, unsigned component)
{
nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component);
nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component);
if (const_val)
return const_val;
@ -1461,7 +1461,7 @@ ac_nir_store_shared_gs_out(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_n
gl_varying_slot slot, unsigned component)
{
assert(value->num_components == 1);
if (is_const_output(pr_out, slot, component))
if (ac_nir_is_const_output(pr_out, slot, component))
return;
unsigned offset = ac_nir_get_lds_gs_out_slot_offset(pr_out, slot, component);
@ -1472,7 +1472,7 @@ nir_def *
ac_nir_load_shared_gs_out(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
gl_varying_slot slot, unsigned component)
{
nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component);
nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component);
if (const_val)
return const_val;

View file

@ -449,15 +449,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
} else {
ac_nir_gs_output_info gs_out_info = {
.streams = stage->info.gs.output_streams,
.sysval_mask = stage->info.gs.output_usage_mask,
.varying_mask = stage->info.gs.output_usage_mask,
};
ac_nir_lower_legacy_gs_options options = {
.has_gen_prim_query = false,
.has_pipeline_stats_query = false,
.output_info = &gs_out_info,
.gfx_level = pdev->info.gfx_level,
.export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
.param_offsets = stage->info.outinfo.vs_output_param_offset,

View file

@ -1324,39 +1324,6 @@ bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *sha
shader->info.shared_size > 0 && sscreen->options.clear_lds;
}
static void
si_init_gs_output_info(struct si_shader_info *info, struct si_temp_shader_variant_info *out_info)
{
for (int i = 0; i < info->num_outputs; i++) {
unsigned slot = info->output_semantic[i];
if (slot < VARYING_SLOT_VAR0_16BIT) {
out_info->gs_streams[slot] = info->output_streams[i];
out_info->gs_out_usage_mask[slot] = info->output_usagemask[i];
} else {
unsigned index = slot - VARYING_SLOT_VAR0_16BIT;
/* TODO: 16bit need separated fields for lo/hi part. */
out_info->gs_streams_16bit_lo[index] = info->output_streams[i];
out_info->gs_streams_16bit_hi[index] = info->output_streams[i];
out_info->gs_out_usage_mask_16bit_lo[index] = info->output_usagemask[i];
out_info->gs_out_usage_mask_16bit_hi[index] = info->output_usagemask[i];
}
}
ac_nir_gs_output_info *ac_info = &out_info->gs_out_info;
ac_info->streams = out_info->gs_streams;
ac_info->streams_16bit_lo = out_info->gs_streams_16bit_lo;
ac_info->streams_16bit_hi = out_info->gs_streams_16bit_hi;
ac_info->sysval_mask = out_info->gs_out_usage_mask;
ac_info->varying_mask = out_info->gs_out_usage_mask;
ac_info->varying_mask_16bit_lo = out_info->gs_out_usage_mask_16bit_lo;
ac_info->varying_mask_16bit_hi = out_info->gs_out_usage_mask_16bit_hi;
/* TODO: construct 16bit slot per component store type. */
ac_info->types_16bit_lo = ac_info->types_16bit_hi = NULL;
}
/* Run passes that eliminate code and affect shader_info. These should be run before linking
* and shader_info gathering. Lowering passes can be run here too, but only if they lead to
* better code or lower undesirable representations (like derefs). Lowering passes that prevent
@ -1624,15 +1591,12 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
ctx->temp_info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
}
si_init_gs_output_info(&sel->info, &ctx->temp_info);
unsigned clip_cull_mask =
(sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->info.culldist_mask;
ac_nir_lower_legacy_gs_options options = {
.has_gen_prim_query = false,
.has_pipeline_stats_query = sel->screen->use_ngg,
.output_info = &ctx->temp_info.gs_out_info,
.gfx_level = sel->screen->info.gfx_level,
.export_clipdist_mask = clip_cull_mask,
.param_offsets = ctx->temp_info.vs_output_param_offset,

View file

@ -189,17 +189,6 @@ struct si_shader_info {
* finished.
*/
struct si_temp_shader_variant_info {
/* Legacy GS output info. */
uint8_t gs_streams[64];
uint8_t gs_streams_16bit_lo[16];
uint8_t gs_streams_16bit_hi[16];
uint8_t gs_out_usage_mask[64];
uint8_t gs_out_usage_mask_16bit_lo[16];
uint8_t gs_out_usage_mask_16bit_hi[16];
ac_nir_gs_output_info gs_out_info;
uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
bool has_non_uniform_tex_access : 1;
bool has_shadow_comparison : 1;