mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
ac/nir: switch legacy GS lowering to ac_nir_prerast_out completely
This changes legacy GS outputs to use the same logic as NGG GS. It enables the same optimizations that NGG has such as forwarding constant GS output components to the GS copy shader at compile time. ac_nir_gs_output_info is removed. GS output info is no longer passed to ac_nir_lower_legacy_gs and ac_nir_create_gs_copy_shader separately. ac_nir_lower_legacy_gs now gathers ac_nir_prerast_out, generates GSVS ring stores, and also generates the GS copy shader with GSVS ring loads. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35352>
This commit is contained in:
parent
723ce13f90
commit
42ad7543b8
8 changed files with 119 additions and 225 deletions
|
|
@ -253,22 +253,6 @@ ac_nir_lower_global_access(nir_shader *shader);
|
|||
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
|
||||
bool ac_nir_lower_image_opcodes(nir_shader *nir);
|
||||
|
||||
typedef struct ac_nir_gs_output_info {
|
||||
const uint8_t *streams;
|
||||
const uint8_t *streams_16bit_lo;
|
||||
const uint8_t *streams_16bit_hi;
|
||||
|
||||
const uint8_t *varying_mask;
|
||||
const uint8_t *varying_mask_16bit_lo;
|
||||
const uint8_t *varying_mask_16bit_hi;
|
||||
|
||||
const uint8_t *sysval_mask;
|
||||
|
||||
/* type for each 16bit slot component */
|
||||
nir_alu_type (*types_16bit_lo)[4];
|
||||
nir_alu_type (*types_16bit_hi)[4];
|
||||
} ac_nir_gs_output_info;
|
||||
|
||||
bool
|
||||
ac_nir_lower_legacy_vs(nir_shader *nir,
|
||||
enum amd_gfx_level gfx_level,
|
||||
|
|
@ -286,7 +270,6 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
|
|||
typedef struct {
|
||||
bool has_gen_prim_query;
|
||||
bool has_pipeline_stats_query;
|
||||
ac_nir_gs_output_info *output_info;
|
||||
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t export_clipdist_mask;
|
||||
|
|
@ -300,9 +283,6 @@ typedef struct {
|
|||
bool force_vrs;
|
||||
} ac_nir_lower_legacy_gs_options;
|
||||
|
||||
nir_shader *
|
||||
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options);
|
||||
|
||||
bool
|
||||
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
|
||||
nir_shader **gs_copy_shader);
|
||||
|
|
|
|||
|
|
@ -11,11 +11,11 @@
|
|||
#include "nir_xfb_info.h"
|
||||
|
||||
nir_shader *
|
||||
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options)
|
||||
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options,
|
||||
ac_nir_prerast_out *out)
|
||||
{
|
||||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_VERTEX, gs_nir->options, "gs_copy");
|
||||
ac_nir_gs_output_info *output_info = options->output_info;
|
||||
|
||||
b.shader->info.outputs_written = gs_nir->info.outputs_written;
|
||||
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
|
||||
|
|
@ -37,68 +37,63 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op
|
|||
if (stream_id)
|
||||
nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream));
|
||||
|
||||
uint32_t offset = 0;
|
||||
ac_nir_prerast_out out = {0};
|
||||
if (output_info->types_16bit_lo)
|
||||
memcpy(&out.types_16bit_lo, output_info->types_16bit_lo, sizeof(out.types_16bit_lo));
|
||||
if (output_info->types_16bit_hi)
|
||||
memcpy(&out.types_16bit_hi, output_info->types_16bit_hi, sizeof(out.types_16bit_hi));
|
||||
unsigned offset = 0;
|
||||
|
||||
u_foreach_bit64 (i, gs_nir->info.outputs_written) {
|
||||
const uint8_t usage_mask = output_info->varying_mask[i] | output_info->sysval_mask[i];
|
||||
out.infos[i].components_mask = usage_mask;
|
||||
out.infos[i].as_varying_mask = output_info->varying_mask[i];
|
||||
out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
|
||||
|
||||
u_foreach_bit (j, usage_mask) {
|
||||
if (((output_info->streams[i] >> (j * 2)) & 0x3) != stream)
|
||||
u_foreach_bit (j, out->infos[i].components_mask) {
|
||||
if (((out->infos[i].stream >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
|
||||
out.outputs[i][j] =
|
||||
if (ac_nir_is_const_output(out, i, j)) {
|
||||
out->outputs[i][j] = ac_nir_get_const_output(&b, 32, out, i, j);
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned base = offset * gs_nir->info.gs.vertices_out * 16;
|
||||
out->outputs[i][j] =
|
||||
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||
.base = offset,
|
||||
.base = base,
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||
offset += gs_nir->info.gs.vertices_out * 16 * 4;
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
u_foreach_bit (i, gs_nir->info.outputs_written_16bit) {
|
||||
out.infos_16bit_lo[i].components_mask = output_info->varying_mask_16bit_lo[i];
|
||||
out.infos_16bit_lo[i].as_varying_mask = output_info->varying_mask_16bit_lo[i];
|
||||
out.infos_16bit_hi[i].components_mask = output_info->varying_mask_16bit_hi[i];
|
||||
out.infos_16bit_hi[i].as_varying_mask = output_info->varying_mask_16bit_hi[i];
|
||||
unsigned mask = out->infos_16bit_lo[i].components_mask |
|
||||
out->infos_16bit_hi[i].components_mask;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
out.infos[i].as_varying_mask = output_info->varying_mask[i];
|
||||
out.infos[i].as_sysval_mask = output_info->sysval_mask[i];
|
||||
u_foreach_bit (j, mask) {
|
||||
bool has_lo_16bit = ((out->infos_16bit_lo[i].stream >> (j * 2)) & 0x3) == stream;
|
||||
bool has_hi_16bit = ((out->infos_16bit_hi[i].stream >> (j * 2)) & 0x3) == stream;
|
||||
|
||||
bool has_lo_16bit = (output_info->varying_mask_16bit_lo[i] & (1 << j)) &&
|
||||
((output_info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
|
||||
bool has_hi_16bit = (output_info->varying_mask_16bit_hi[i] & (1 << j)) &&
|
||||
((output_info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
|
||||
if (!has_lo_16bit && !has_hi_16bit)
|
||||
continue;
|
||||
|
||||
nir_def *data =
|
||||
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||
.base = offset,
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||
nir_def *load_val;
|
||||
|
||||
if (ac_nir_is_const_output(out, VARYING_SLOT_VAR0_16BIT + i, j)) {
|
||||
load_val = ac_nir_get_const_output(&b, 32, out, i, j);
|
||||
} else {
|
||||
unsigned base = offset * gs_nir->info.gs.vertices_out * 16;
|
||||
load_val = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||
.base = base,
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
if (has_lo_16bit)
|
||||
out.outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
|
||||
out->outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, load_val);
|
||||
|
||||
if (has_hi_16bit)
|
||||
out.outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data);
|
||||
|
||||
offset += gs_nir->info.gs.vertices_out * 16 * 4;
|
||||
out->outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, load_val);
|
||||
}
|
||||
}
|
||||
|
||||
if (stream_id)
|
||||
ac_nir_emit_legacy_streamout(&b, stream, info, &out);
|
||||
ac_nir_emit_legacy_streamout(&b, stream, info, out);
|
||||
|
||||
/* This should be after streamout and before exports. */
|
||||
ac_nir_clamp_vertex_color_outputs(&b, &out);
|
||||
ac_nir_clamp_vertex_color_outputs(&b, out);
|
||||
|
||||
if (stream == 0) {
|
||||
uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS;
|
||||
|
|
@ -110,13 +105,13 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op
|
|||
ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false,
|
||||
options->write_pos_to_clipvertex, options->pack_clip_cull_distances,
|
||||
!options->has_param_exports, options->force_vrs, export_outputs,
|
||||
&out, NULL);
|
||||
out, NULL);
|
||||
|
||||
if (options->has_param_exports) {
|
||||
ac_nir_export_parameters(&b, options->param_offsets,
|
||||
b.shader->info.outputs_written,
|
||||
b.shader->info.outputs_written_16bit,
|
||||
&out);
|
||||
out);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "ac_hw_stage.h"
|
||||
#include "ac_shader_args.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "ac_nir.h"
|
||||
#include "nir_defines.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -237,6 +238,13 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b,
|
|||
nir_def *buffer_offsets_ret[4],
|
||||
nir_def *emit_prim_ret[4]);
|
||||
|
||||
bool
|
||||
ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component);
|
||||
|
||||
nir_def *
|
||||
ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
|
||||
unsigned component);
|
||||
|
||||
void
|
||||
ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
|
||||
gl_varying_slot slot, unsigned component);
|
||||
|
|
@ -272,6 +280,10 @@ ac_nir_compute_prerast_packed_output_info(ac_nir_prerast_out *pr_out);
|
|||
unsigned
|
||||
ac_nir_gs_output_component_mask_with_stream(ac_nir_prerast_per_output_info *info, unsigned stream);
|
||||
|
||||
nir_shader *
|
||||
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options,
|
||||
ac_nir_prerast_out *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -10,11 +10,7 @@
|
|||
#include "nir_builder.h"
|
||||
|
||||
typedef struct {
|
||||
nir_def *outputs[64][4];
|
||||
nir_def *outputs_16bit_lo[16][4];
|
||||
nir_def *outputs_16bit_hi[16][4];
|
||||
|
||||
ac_nir_gs_output_info *info;
|
||||
ac_nir_prerast_out out;
|
||||
|
||||
nir_def *vertex_count[4];
|
||||
nir_def *primitive_count[4];
|
||||
|
|
@ -24,54 +20,7 @@ static bool
|
|||
lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
lower_legacy_gs_state *s)
|
||||
{
|
||||
/* Assume:
|
||||
* - the shader used nir_lower_io_vars_to_temporaries
|
||||
* - 64-bit outputs are lowered
|
||||
* - no indirect indexing is present
|
||||
*/
|
||||
assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
unsigned component = nir_intrinsic_component(intrin);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
nir_def **outputs;
|
||||
if (sem.location < VARYING_SLOT_VAR0_16BIT) {
|
||||
outputs = s->outputs[sem.location];
|
||||
} else {
|
||||
unsigned index = sem.location - VARYING_SLOT_VAR0_16BIT;
|
||||
if (sem.high_16bits)
|
||||
outputs = s->outputs_16bit_hi[index];
|
||||
else
|
||||
outputs = s->outputs_16bit_lo[index];
|
||||
}
|
||||
|
||||
nir_def *store_val = intrin->src[0].ssa;
|
||||
/* 64bit output has been lowered to 32bit */
|
||||
assert(store_val->bit_size <= 32);
|
||||
|
||||
/* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
|
||||
const bool non_dedicated_16bit = sem.location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
|
||||
|
||||
u_foreach_bit (i, write_mask) {
|
||||
unsigned comp = component + i;
|
||||
nir_def *store_component = nir_channel(b, store_val, i);
|
||||
|
||||
if (non_dedicated_16bit) {
|
||||
if (sem.high_16bits) {
|
||||
nir_def *lo = outputs[comp] ? nir_unpack_32_2x16_split_x(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
|
||||
outputs[comp] = nir_pack_32_2x16_split(b, lo, store_component);
|
||||
} else {
|
||||
nir_def *hi = outputs[comp] ? nir_unpack_32_2x16_split_y(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16);
|
||||
outputs[comp] = nir_pack_32_2x16_split(b, store_component, hi);
|
||||
}
|
||||
} else {
|
||||
outputs[comp] = store_component;
|
||||
}
|
||||
}
|
||||
|
||||
ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, true);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -89,29 +38,21 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
|
||||
|
||||
unsigned offset = 0;
|
||||
u_foreach_bit64 (i, b->shader->info.outputs_written) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
nir_def *output = s->outputs[i][j];
|
||||
/* Next vertex emit need a new value, reset all outputs. */
|
||||
s->outputs[i][j] = NULL;
|
||||
|
||||
const uint8_t usage_mask = s->info->varying_mask[i] | s->info->sysval_mask[i];
|
||||
u_foreach_bit64 (slot, b->shader->info.outputs_written) {
|
||||
unsigned mask = ac_nir_gs_output_component_mask_with_stream(&s->out.infos[slot], stream);
|
||||
nir_def **output = s->out.outputs[slot];
|
||||
|
||||
if (!(usage_mask & (1 << j)) ||
|
||||
((s->info->streams[i] >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
|
||||
unsigned base = offset * b->shader->info.gs.vertices_out * 4;
|
||||
offset++;
|
||||
|
||||
/* no one set this output, skip the buffer store */
|
||||
if (!output)
|
||||
u_foreach_bit(c, mask) {
|
||||
/* The shader hasn't written this output yet. */
|
||||
if (!output[c] || ac_nir_is_const_output(&s->out, slot, c))
|
||||
continue;
|
||||
|
||||
unsigned base = offset * b->shader->info.gs.vertices_out;
|
||||
nir_def *voffset = nir_ishl_imm(b, vtxidx, 2);
|
||||
|
||||
/* extend 8/16 bit to 32 bit, 64 bit has been lowered */
|
||||
nir_def *data = nir_u2uN(b, output, 32);
|
||||
nir_def *store_val = nir_u2u32(b, output[c]);
|
||||
|
||||
unsigned align_mul = 4;
|
||||
unsigned align_offset = 0;
|
||||
|
|
@ -121,57 +62,56 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
align_offset = v_const_offset % align_mul;
|
||||
}
|
||||
|
||||
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||
nir_store_buffer_amd(b, store_val, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
|
||||
ACCESS_IS_SWIZZLED_AMD,
|
||||
.base = base,
|
||||
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||
.memory_modes = nir_var_shader_out,
|
||||
.align_mul = align_mul, .align_offset = align_offset);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
/* Clear all outputs (they are undefined after emit_vertex) */
|
||||
memset(s->out.outputs[slot], 0, sizeof(s->out.outputs[slot]));
|
||||
}
|
||||
|
||||
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
nir_def *output_lo = s->outputs_16bit_lo[i][j];
|
||||
nir_def *output_hi = s->outputs_16bit_hi[i][j];
|
||||
/* Next vertex emit need a new value, reset all outputs. */
|
||||
s->outputs_16bit_lo[i][j] = NULL;
|
||||
s->outputs_16bit_hi[i][j] = NULL;
|
||||
u_foreach_bit (slot, b->shader->info.outputs_written_16bit) {
|
||||
const unsigned mask_lo = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_lo + slot, stream);
|
||||
const unsigned mask_hi = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_hi + slot, stream);
|
||||
unsigned mask = mask_lo | mask_hi;
|
||||
|
||||
bool has_lo_16bit = (s->info->varying_mask_16bit_lo[i] & (1 << j)) &&
|
||||
((s->info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
|
||||
bool has_hi_16bit = (s->info->varying_mask_16bit_hi[i] & (1 << j)) &&
|
||||
((s->info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
|
||||
if (!has_lo_16bit && !has_hi_16bit)
|
||||
nir_def **output_lo = s->out.outputs_16bit_lo[slot];
|
||||
nir_def **output_hi = s->out.outputs_16bit_hi[slot];
|
||||
nir_def *undef = nir_undef(b, 1, 16);
|
||||
|
||||
u_foreach_bit(c, mask) {
|
||||
/* The shader hasn't written this output yet. */
|
||||
if ((!output_lo[c] && !output_hi[c]) ||
|
||||
ac_nir_is_const_output(&s->out, VARYING_SLOT_VAR0_16BIT + slot, c))
|
||||
continue;
|
||||
|
||||
nir_def *lo = output_lo[c] ? output_lo[c] : undef;
|
||||
nir_def *hi = output_hi[c] ? output_hi[c] : undef;
|
||||
nir_def *store_val = nir_pack_32_2x16_split(b, lo, hi);
|
||||
|
||||
unsigned base = offset * b->shader->info.gs.vertices_out;
|
||||
offset++;
|
||||
|
||||
bool has_lo_16bit_out = has_lo_16bit && output_lo;
|
||||
bool has_hi_16bit_out = has_hi_16bit && output_hi;
|
||||
|
||||
/* no one set needed output, skip the buffer store */
|
||||
if (!has_lo_16bit_out && !has_hi_16bit_out)
|
||||
continue;
|
||||
|
||||
if (!has_lo_16bit_out)
|
||||
output_lo = nir_undef(b, 1, 16);
|
||||
|
||||
if (!has_hi_16bit_out)
|
||||
output_hi = nir_undef(b, 1, 16);
|
||||
|
||||
nir_def *voffset = nir_iadd_imm(b, vtxidx, base);
|
||||
voffset = nir_ishl_imm(b, voffset, 2);
|
||||
|
||||
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
|
||||
nir_store_buffer_amd(b, store_val,
|
||||
gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
|
||||
ACCESS_IS_SWIZZLED_AMD,
|
||||
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||
.memory_modes = nir_var_shader_out);
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
/* Clear all outputs (they are undefined after emit_vertex) */
|
||||
memset(s->out.outputs_16bit_lo[slot], 0, sizeof(s->out.outputs_16bit_lo[slot]));
|
||||
memset(s->out.outputs_16bit_hi[slot], 0, sizeof(s->out.outputs_16bit_hi[slot]));
|
||||
}
|
||||
|
||||
/* Signal vertex emission. */
|
||||
|
|
@ -229,13 +169,33 @@ lower_legacy_gs_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *sta
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
gather_output_store_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
||||
{
|
||||
lower_legacy_gs_state *s = (lower_legacy_gs_state *) state;
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_store_output) {
|
||||
ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
gather_output_stores(nir_shader *shader, lower_legacy_gs_state *s)
|
||||
{
|
||||
nir_shader_intrinsics_pass(shader, gather_output_store_intrinsic, nir_metadata_none, s);
|
||||
}
|
||||
|
||||
bool
|
||||
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
|
||||
nir_shader **gs_copy_shader)
|
||||
{
|
||||
lower_legacy_gs_state s = {
|
||||
.info = options->output_info,
|
||||
};
|
||||
lower_legacy_gs_state s = {0};
|
||||
|
||||
gather_output_stores(nir, &s);
|
||||
ac_nir_compute_prerast_packed_output_info(&s.out);
|
||||
|
||||
unsigned num_vertices_per_primitive = 0;
|
||||
switch (nir->info.gs.output_primitive) {
|
||||
|
|
@ -284,6 +244,6 @@ ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
|
|||
|
||||
nir_progress(progress, impl, nir_metadata_none);
|
||||
|
||||
*gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options);
|
||||
*gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options, &s.out);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1407,8 +1407,8 @@ ac_nir_ngg_get_xfb_lds_offset(ac_nir_prerast_out *pr_out, gl_varying_slot slot,
|
|||
return lds_slot_offset + util_bitcount(lds_component_mask & BITFIELD_MASK(component)) * 4;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component)
|
||||
bool
|
||||
ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component)
|
||||
{
|
||||
if (slot >= VARYING_SLOT_VAR0_16BIT) {
|
||||
slot -= VARYING_SLOT_VAR0_16BIT;
|
||||
|
|
@ -1419,11 +1419,11 @@ is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned compo
|
|||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
|
||||
unsigned component)
|
||||
nir_def *
|
||||
ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot,
|
||||
unsigned component)
|
||||
{
|
||||
if (!is_const_output(pr_out, slot, component))
|
||||
if (!ac_nir_is_const_output(pr_out, slot, component))
|
||||
return NULL;
|
||||
|
||||
if (slot >= VARYING_SLOT_VAR0_16BIT)
|
||||
|
|
@ -1437,7 +1437,7 @@ ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_
|
|||
gl_varying_slot slot, unsigned component)
|
||||
{
|
||||
assert(value->num_components == 1);
|
||||
if (is_const_output(pr_out, slot, component))
|
||||
if (ac_nir_is_const_output(pr_out, slot, component))
|
||||
return;
|
||||
|
||||
unsigned offset = ac_nir_ngg_get_xfb_lds_offset(pr_out, slot, component, value->bit_size == 16);
|
||||
|
|
@ -1448,7 +1448,7 @@ nir_def *
|
|||
ac_nir_load_shared_xfb(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
|
||||
gl_varying_slot slot, unsigned component)
|
||||
{
|
||||
nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component);
|
||||
nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component);
|
||||
if (const_val)
|
||||
return const_val;
|
||||
|
||||
|
|
@ -1461,7 +1461,7 @@ ac_nir_store_shared_gs_out(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_n
|
|||
gl_varying_slot slot, unsigned component)
|
||||
{
|
||||
assert(value->num_components == 1);
|
||||
if (is_const_output(pr_out, slot, component))
|
||||
if (ac_nir_is_const_output(pr_out, slot, component))
|
||||
return;
|
||||
|
||||
unsigned offset = ac_nir_get_lds_gs_out_slot_offset(pr_out, slot, component);
|
||||
|
|
@ -1472,7 +1472,7 @@ nir_def *
|
|||
ac_nir_load_shared_gs_out(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out,
|
||||
gl_varying_slot slot, unsigned component)
|
||||
{
|
||||
nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component);
|
||||
nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component);
|
||||
if (const_val)
|
||||
return const_val;
|
||||
|
||||
|
|
|
|||
|
|
@ -449,15 +449,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
|
||||
|
||||
} else {
|
||||
ac_nir_gs_output_info gs_out_info = {
|
||||
.streams = stage->info.gs.output_streams,
|
||||
.sysval_mask = stage->info.gs.output_usage_mask,
|
||||
.varying_mask = stage->info.gs.output_usage_mask,
|
||||
};
|
||||
ac_nir_lower_legacy_gs_options options = {
|
||||
.has_gen_prim_query = false,
|
||||
.has_pipeline_stats_query = false,
|
||||
.output_info = &gs_out_info,
|
||||
.gfx_level = pdev->info.gfx_level,
|
||||
.export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
|
||||
.param_offsets = stage->info.outinfo.vs_output_param_offset,
|
||||
|
|
|
|||
|
|
@ -1324,39 +1324,6 @@ bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *sha
|
|||
shader->info.shared_size > 0 && sscreen->options.clear_lds;
|
||||
}
|
||||
|
||||
static void
|
||||
si_init_gs_output_info(struct si_shader_info *info, struct si_temp_shader_variant_info *out_info)
|
||||
{
|
||||
for (int i = 0; i < info->num_outputs; i++) {
|
||||
unsigned slot = info->output_semantic[i];
|
||||
if (slot < VARYING_SLOT_VAR0_16BIT) {
|
||||
out_info->gs_streams[slot] = info->output_streams[i];
|
||||
out_info->gs_out_usage_mask[slot] = info->output_usagemask[i];
|
||||
} else {
|
||||
unsigned index = slot - VARYING_SLOT_VAR0_16BIT;
|
||||
/* TODO: 16bit need separated fields for lo/hi part. */
|
||||
out_info->gs_streams_16bit_lo[index] = info->output_streams[i];
|
||||
out_info->gs_streams_16bit_hi[index] = info->output_streams[i];
|
||||
out_info->gs_out_usage_mask_16bit_lo[index] = info->output_usagemask[i];
|
||||
out_info->gs_out_usage_mask_16bit_hi[index] = info->output_usagemask[i];
|
||||
}
|
||||
}
|
||||
|
||||
ac_nir_gs_output_info *ac_info = &out_info->gs_out_info;
|
||||
|
||||
ac_info->streams = out_info->gs_streams;
|
||||
ac_info->streams_16bit_lo = out_info->gs_streams_16bit_lo;
|
||||
ac_info->streams_16bit_hi = out_info->gs_streams_16bit_hi;
|
||||
|
||||
ac_info->sysval_mask = out_info->gs_out_usage_mask;
|
||||
ac_info->varying_mask = out_info->gs_out_usage_mask;
|
||||
ac_info->varying_mask_16bit_lo = out_info->gs_out_usage_mask_16bit_lo;
|
||||
ac_info->varying_mask_16bit_hi = out_info->gs_out_usage_mask_16bit_hi;
|
||||
|
||||
/* TODO: construct 16bit slot per component store type. */
|
||||
ac_info->types_16bit_lo = ac_info->types_16bit_hi = NULL;
|
||||
}
|
||||
|
||||
/* Run passes that eliminate code and affect shader_info. These should be run before linking
|
||||
* and shader_info gathering. Lowering passes can be run here too, but only if they lead to
|
||||
* better code or lower undesirable representations (like derefs). Lowering passes that prevent
|
||||
|
|
@ -1624,15 +1591,12 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
|||
ctx->temp_info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
|
||||
}
|
||||
|
||||
si_init_gs_output_info(&sel->info, &ctx->temp_info);
|
||||
|
||||
unsigned clip_cull_mask =
|
||||
(sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->info.culldist_mask;
|
||||
|
||||
ac_nir_lower_legacy_gs_options options = {
|
||||
.has_gen_prim_query = false,
|
||||
.has_pipeline_stats_query = sel->screen->use_ngg,
|
||||
.output_info = &ctx->temp_info.gs_out_info,
|
||||
.gfx_level = sel->screen->info.gfx_level,
|
||||
.export_clipdist_mask = clip_cull_mask,
|
||||
.param_offsets = ctx->temp_info.vs_output_param_offset,
|
||||
|
|
|
|||
|
|
@ -189,17 +189,6 @@ struct si_shader_info {
|
|||
* finished.
|
||||
*/
|
||||
struct si_temp_shader_variant_info {
|
||||
/* Legacy GS output info. */
|
||||
uint8_t gs_streams[64];
|
||||
uint8_t gs_streams_16bit_lo[16];
|
||||
uint8_t gs_streams_16bit_hi[16];
|
||||
|
||||
uint8_t gs_out_usage_mask[64];
|
||||
uint8_t gs_out_usage_mask_16bit_lo[16];
|
||||
uint8_t gs_out_usage_mask_16bit_hi[16];
|
||||
|
||||
ac_nir_gs_output_info gs_out_info;
|
||||
|
||||
uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
|
||||
bool has_non_uniform_tex_access : 1;
|
||||
bool has_shadow_comparison : 1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue