diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index f01ee03c62b..15d41064b6e 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -253,22 +253,6 @@ ac_nir_lower_global_access(nir_shader *shader); bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level); bool ac_nir_lower_image_opcodes(nir_shader *nir); -typedef struct ac_nir_gs_output_info { - const uint8_t *streams; - const uint8_t *streams_16bit_lo; - const uint8_t *streams_16bit_hi; - - const uint8_t *varying_mask; - const uint8_t *varying_mask_16bit_lo; - const uint8_t *varying_mask_16bit_hi; - - const uint8_t *sysval_mask; - - /* type for each 16bit slot component */ - nir_alu_type (*types_16bit_lo)[4]; - nir_alu_type (*types_16bit_hi)[4]; -} ac_nir_gs_output_info; - bool ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, @@ -286,7 +270,6 @@ ac_nir_lower_legacy_vs(nir_shader *nir, typedef struct { bool has_gen_prim_query; bool has_pipeline_stats_query; - ac_nir_gs_output_info *output_info; enum amd_gfx_level gfx_level; uint32_t export_clipdist_mask; @@ -300,9 +283,6 @@ typedef struct { bool force_vrs; } ac_nir_lower_legacy_gs_options; -nir_shader * -ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options); - bool ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options, nir_shader **gs_copy_shader); diff --git a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c index 35c10e34f23..33280f6cf29 100644 --- a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c +++ b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c @@ -11,11 +11,11 @@ #include "nir_xfb_info.h" nir_shader * -ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options) +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options, + ac_nir_prerast_out *out) { nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_VERTEX, gs_nir->options, "gs_copy"); - ac_nir_gs_output_info *output_info = options->output_info; b.shader->info.outputs_written = gs_nir->info.outputs_written; b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit; @@ -37,68 +37,63 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op if (stream_id) nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream)); - uint32_t offset = 0; - ac_nir_prerast_out out = {0}; - if (output_info->types_16bit_lo) - memcpy(&out.types_16bit_lo, output_info->types_16bit_lo, sizeof(out.types_16bit_lo)); - if (output_info->types_16bit_hi) - memcpy(&out.types_16bit_hi, output_info->types_16bit_hi, sizeof(out.types_16bit_hi)); + unsigned offset = 0; u_foreach_bit64 (i, gs_nir->info.outputs_written) { - const uint8_t usage_mask = output_info->varying_mask[i] | output_info->sysval_mask[i]; - out.infos[i].components_mask = usage_mask; - out.infos[i].as_varying_mask = output_info->varying_mask[i]; - out.infos[i].as_sysval_mask = output_info->sysval_mask[i]; - - u_foreach_bit (j, usage_mask) { - if (((output_info->streams[i] >> (j * 2)) & 0x3) != stream) + u_foreach_bit (j, out->infos[i].components_mask) { + if (((out->infos[i].stream >> (j * 2)) & 0x3) != stream) continue; - out.outputs[i][j] = + if (ac_nir_is_const_output(out, i, j)) { + out->outputs[i][j] = ac_nir_get_const_output(&b, 32, out, i, j); + continue; + } + + unsigned base = offset * gs_nir->info.gs.vertices_out * 16; + out->outputs[i][j] = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, - .base = offset, + .base = base, .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); - offset += gs_nir->info.gs.vertices_out * 16 * 4; + offset += 4; } } u_foreach_bit (i, gs_nir->info.outputs_written_16bit) { - out.infos_16bit_lo[i].components_mask = output_info->varying_mask_16bit_lo[i]; - out.infos_16bit_lo[i].as_varying_mask = output_info->varying_mask_16bit_lo[i]; - out.infos_16bit_hi[i].components_mask = output_info->varying_mask_16bit_hi[i]; - out.infos_16bit_hi[i].as_varying_mask = output_info->varying_mask_16bit_hi[i]; + unsigned mask = out->infos_16bit_lo[i].components_mask | + out->infos_16bit_hi[i].components_mask; - for (unsigned j = 0; j < 4; j++) { - out.infos[i].as_varying_mask = output_info->varying_mask[i]; - out.infos[i].as_sysval_mask = output_info->sysval_mask[i]; + u_foreach_bit (j, mask) { + bool has_lo_16bit = ((out->infos_16bit_lo[i].stream >> (j * 2)) & 0x3) == stream; + bool has_hi_16bit = ((out->infos_16bit_hi[i].stream >> (j * 2)) & 0x3) == stream; - bool has_lo_16bit = (output_info->varying_mask_16bit_lo[i] & (1 << j)) && - ((output_info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream; - bool has_hi_16bit = (output_info->varying_mask_16bit_hi[i] & (1 << j)) && - ((output_info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream; if (!has_lo_16bit && !has_hi_16bit) continue; - nir_def *data = - nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, - .base = offset, - .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); + nir_def *load_val; + + if (ac_nir_is_const_output(out, VARYING_SLOT_VAR0_16BIT + i, j)) { + load_val = ac_nir_get_const_output(&b, 32, out, i, j); + } else { + unsigned base = offset * gs_nir->info.gs.vertices_out * 16; + load_val = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, + .base = base, + .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL); + offset += 4; + } if (has_lo_16bit) - out.outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data); + out->outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, load_val); if (has_hi_16bit) - out.outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data); - - offset += gs_nir->info.gs.vertices_out * 16 * 4; + out->outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, load_val); } } if (stream_id) - ac_nir_emit_legacy_streamout(&b, stream, info, &out); + ac_nir_emit_legacy_streamout(&b, stream, info, out); /* This should be after streamout and before exports. */ - ac_nir_clamp_vertex_color_outputs(&b, &out); + ac_nir_clamp_vertex_color_outputs(&b, out); if (stream == 0) { uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS; @@ -110,13 +105,13 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_op ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false, options->write_pos_to_clipvertex, options->pack_clip_cull_distances, !options->has_param_exports, options->force_vrs, export_outputs, - &out, NULL); + out, NULL); if (options->has_param_exports) { ac_nir_export_parameters(&b, options->param_offsets, b.shader->info.outputs_written, b.shader->info.outputs_written_16bit, - &out); + out); } } diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index 39d0bf89e41..de09d2a6f4b 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -11,6 +11,7 @@ #include "ac_hw_stage.h" #include "ac_shader_args.h" #include "ac_shader_util.h" +#include "ac_nir.h" #include "nir_defines.h" #ifdef __cplusplus @@ -237,6 +238,13 @@ ac_nir_ngg_build_streamout_buffer_info(nir_builder *b, nir_def *buffer_offsets_ret[4], nir_def *emit_prim_ret[4]); +bool +ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component); + +nir_def * +ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot, + unsigned component); + void ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component); @@ -272,6 +280,10 @@ ac_nir_compute_prerast_packed_output_info(ac_nir_prerast_out *pr_out); unsigned ac_nir_gs_output_component_mask_with_stream(ac_nir_prerast_per_output_info *info, unsigned stream); +nir_shader * +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options, + ac_nir_prerast_out *out); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/nir/ac_nir_lower_legacy_gs.c b/src/amd/common/nir/ac_nir_lower_legacy_gs.c index 4c8b911e55a..aed0adb640f 100644 --- a/src/amd/common/nir/ac_nir_lower_legacy_gs.c +++ b/src/amd/common/nir/ac_nir_lower_legacy_gs.c @@ -10,11 +10,7 @@ #include "nir_builder.h" typedef struct { - nir_def *outputs[64][4]; - nir_def *outputs_16bit_lo[16][4]; - nir_def *outputs_16bit_hi[16][4]; - - ac_nir_gs_output_info *info; + ac_nir_prerast_out out; nir_def *vertex_count[4]; nir_def *primitive_count[4]; @@ -24,54 +20,7 @@ static bool lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_legacy_gs_state *s) { - /* Assume: - * - the shader used nir_lower_io_vars_to_temporaries - * - 64-bit outputs are lowered - * - no indirect indexing is present - */ - assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1])); - - b->cursor = nir_before_instr(&intrin->instr); - - unsigned component = nir_intrinsic_component(intrin); - unsigned write_mask = nir_intrinsic_write_mask(intrin); - nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); - - nir_def **outputs; - if (sem.location < VARYING_SLOT_VAR0_16BIT) { - outputs = s->outputs[sem.location]; - } else { - unsigned index = sem.location - VARYING_SLOT_VAR0_16BIT; - if (sem.high_16bits) - outputs = s->outputs_16bit_hi[index]; - else - outputs = s->outputs_16bit_lo[index]; - } - - nir_def *store_val = intrin->src[0].ssa; - /* 64bit output has been lowered to 32bit */ - assert(store_val->bit_size <= 32); - - /* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */ - const bool non_dedicated_16bit = sem.location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16; - - u_foreach_bit (i, write_mask) { - unsigned comp = component + i; - nir_def *store_component = nir_channel(b, store_val, i); - - if (non_dedicated_16bit) { - if (sem.high_16bits) { - nir_def *lo = outputs[comp] ? nir_unpack_32_2x16_split_x(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16); - outputs[comp] = nir_pack_32_2x16_split(b, lo, store_component); - } else { - nir_def *hi = outputs[comp] ? nir_unpack_32_2x16_split_y(b, outputs[comp]) : nir_imm_intN_t(b, 0, 16); - outputs[comp] = nir_pack_32_2x16_split(b, store_component, hi); - } - } else { - outputs[comp] = store_component; - } - } - + ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, true); nir_instr_remove(&intrin->instr); return true; } @@ -89,29 +38,21 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b); unsigned offset = 0; - u_foreach_bit64 (i, b->shader->info.outputs_written) { - for (unsigned j = 0; j < 4; j++) { - nir_def *output = s->outputs[i][j]; - /* Next vertex emit need a new value, reset all outputs. */ - s->outputs[i][j] = NULL; - const uint8_t usage_mask = s->info->varying_mask[i] | s->info->sysval_mask[i]; + u_foreach_bit64 (slot, b->shader->info.outputs_written) { + unsigned mask = ac_nir_gs_output_component_mask_with_stream(&s->out.infos[slot], stream); + nir_def **output = s->out.outputs[slot]; - if (!(usage_mask & (1 << j)) || - ((s->info->streams[i] >> (j * 2)) & 0x3) != stream) - continue; - - unsigned base = offset * b->shader->info.gs.vertices_out * 4; - offset++; - - /* no one set this output, skip the buffer store */ - if (!output) + u_foreach_bit(c, mask) { + /* The shader hasn't written this output yet. */ + if (!output[c] || ac_nir_is_const_output(&s->out, slot, c)) continue; + unsigned base = offset * b->shader->info.gs.vertices_out; nir_def *voffset = nir_ishl_imm(b, vtxidx, 2); /* extend 8/16 bit to 32 bit, 64 bit has been lowered */ - nir_def *data = nir_u2uN(b, output, 32); + nir_def *store_val = nir_u2u32(b, output[c]); unsigned align_mul = 4; unsigned align_offset = 0; @@ -121,57 +62,56 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in align_offset = v_const_offset % align_mul; } - nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), + nir_store_buffer_amd(b, store_val, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL | ACCESS_IS_SWIZZLED_AMD, .base = base, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out, .align_mul = align_mul, .align_offset = align_offset); + offset += 4; } + + /* Clear all outputs (they are undefined after emit_vertex) */ + memset(s->out.outputs[slot], 0, sizeof(s->out.outputs[slot])); } - u_foreach_bit (i, b->shader->info.outputs_written_16bit) { - for (unsigned j = 0; j < 4; j++) { - nir_def *output_lo = s->outputs_16bit_lo[i][j]; - nir_def *output_hi = s->outputs_16bit_hi[i][j]; - /* Next vertex emit need a new value, reset all outputs. */ - s->outputs_16bit_lo[i][j] = NULL; - s->outputs_16bit_hi[i][j] = NULL; + u_foreach_bit (slot, b->shader->info.outputs_written_16bit) { + const unsigned mask_lo = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_lo + slot, stream); + const unsigned mask_hi = ac_nir_gs_output_component_mask_with_stream(s->out.infos_16bit_hi + slot, stream); + unsigned mask = mask_lo | mask_hi; - bool has_lo_16bit = (s->info->varying_mask_16bit_lo[i] & (1 << j)) && - ((s->info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream; - bool has_hi_16bit = (s->info->varying_mask_16bit_hi[i] & (1 << j)) && - ((s->info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream; - if (!has_lo_16bit && !has_hi_16bit) + nir_def **output_lo = s->out.outputs_16bit_lo[slot]; + nir_def **output_hi = s->out.outputs_16bit_hi[slot]; + nir_def *undef = nir_undef(b, 1, 16); + + u_foreach_bit(c, mask) { + /* The shader hasn't written this output yet. */ + if ((!output_lo[c] && !output_hi[c]) || + ac_nir_is_const_output(&s->out, VARYING_SLOT_VAR0_16BIT + slot, c)) continue; + nir_def *lo = output_lo[c] ? output_lo[c] : undef; + nir_def *hi = output_hi[c] ? output_hi[c] : undef; + nir_def *store_val = nir_pack_32_2x16_split(b, lo, hi); + unsigned base = offset * b->shader->info.gs.vertices_out; - offset++; - - bool has_lo_16bit_out = has_lo_16bit && output_lo; - bool has_hi_16bit_out = has_hi_16bit && output_hi; - - /* no one set needed output, skip the buffer store */ - if (!has_lo_16bit_out && !has_hi_16bit_out) - continue; - - if (!has_lo_16bit_out) - output_lo = nir_undef(b, 1, 16); - - if (!has_hi_16bit_out) - output_hi = nir_undef(b, 1, 16); nir_def *voffset = nir_iadd_imm(b, vtxidx, base); voffset = nir_ishl_imm(b, voffset, 2); - nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi), + nir_store_buffer_amd(b, store_val, gsvs_ring, voffset, soffset, nir_imm_int(b, 0), .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL | ACCESS_IS_SWIZZLED_AMD, /* For ACO to not reorder this store around EmitVertex/EndPrimitve */ .memory_modes = nir_var_shader_out); + offset += 4; } + + /* Clear all outputs (they are undefined after emit_vertex) */ + memset(s->out.outputs_16bit_lo[slot], 0, sizeof(s->out.outputs_16bit_lo[slot])); + memset(s->out.outputs_16bit_hi[slot], 0, sizeof(s->out.outputs_16bit_hi[slot])); } /* Signal vertex emission. */ @@ -229,13 +169,33 @@ lower_legacy_gs_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *sta return false; } +static bool +gather_output_store_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *state) +{ + lower_legacy_gs_state *s = (lower_legacy_gs_state *) state; + + if (intrin->intrinsic == nir_intrinsic_store_output) { + ac_nir_gather_prerast_store_output_info(b, intrin, &s->out, false); + return true; + } + + return false; +} + +static void +gather_output_stores(nir_shader *shader, lower_legacy_gs_state *s) +{ + nir_shader_intrinsics_pass(shader, gather_output_store_intrinsic, nir_metadata_none, s); +} + bool ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options, nir_shader **gs_copy_shader) { - lower_legacy_gs_state s = { - .info = options->output_info, - }; + lower_legacy_gs_state s = {0}; + + gather_output_stores(nir, &s); + ac_nir_compute_prerast_packed_output_info(&s.out); unsigned num_vertices_per_primitive = 0; switch (nir->info.gs.output_primitive) { @@ -284,6 +244,6 @@ ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options, nir_progress(progress, impl, nir_metadata_none); - *gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options); + *gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options, &s.out); return true; } diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 7a66150031f..54fd7aa919e 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -1407,8 +1407,8 @@ ac_nir_ngg_get_xfb_lds_offset(ac_nir_prerast_out *pr_out, gl_varying_slot slot, return lds_slot_offset + util_bitcount(lds_component_mask & BITFIELD_MASK(component)) * 4; } -static bool -is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component) +bool +ac_nir_is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component) { if (slot >= VARYING_SLOT_VAR0_16BIT) { slot -= VARYING_SLOT_VAR0_16BIT; @@ -1419,11 +1419,11 @@ is_const_output(ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned compo } } -static nir_def * -get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot, - unsigned component) +nir_def * +ac_nir_get_const_output(nir_builder *b, unsigned bit_size, ac_nir_prerast_out *pr_out, gl_varying_slot slot, + unsigned component) { - if (!is_const_output(pr_out, slot, component)) + if (!ac_nir_is_const_output(pr_out, slot, component)) return NULL; if (slot >= VARYING_SLOT_VAR0_16BIT) @@ -1437,7 +1437,7 @@ ac_nir_store_shared_xfb(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_nir_ gl_varying_slot slot, unsigned component) { assert(value->num_components == 1); - if (is_const_output(pr_out, slot, component)) + if (ac_nir_is_const_output(pr_out, slot, component)) return; unsigned offset = ac_nir_ngg_get_xfb_lds_offset(pr_out, slot, component, value->bit_size == 16); @@ -1448,7 +1448,7 @@ nir_def * ac_nir_load_shared_xfb(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component) { - nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component); + nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component); if (const_val) return const_val; @@ -1461,7 +1461,7 @@ ac_nir_store_shared_gs_out(nir_builder *b, nir_def *value, nir_def *vtxptr, ac_n gl_varying_slot slot, unsigned component) { assert(value->num_components == 1); - if (is_const_output(pr_out, slot, component)) + if (ac_nir_is_const_output(pr_out, slot, component)) return; unsigned offset = ac_nir_get_lds_gs_out_slot_offset(pr_out, slot, component); @@ -1472,7 +1472,7 @@ nir_def * ac_nir_load_shared_gs_out(nir_builder *b, unsigned bit_size, nir_def *vtxptr, ac_nir_prerast_out *pr_out, gl_varying_slot slot, unsigned component) { - nir_def *const_val = get_const_output(b, bit_size, pr_out, slot, component); + nir_def *const_val = ac_nir_get_const_output(b, bit_size, pr_out, slot, component); if (const_val) return const_val; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index cdc61488a22..6fe23b53e0a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -449,15 +449,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex); } else { - ac_nir_gs_output_info gs_out_info = { - .streams = stage->info.gs.output_streams, - .sysval_mask = stage->info.gs.output_usage_mask, - .varying_mask = stage->info.gs.output_usage_mask, - }; ac_nir_lower_legacy_gs_options options = { .has_gen_prim_query = false, .has_pipeline_stats_query = false, - .output_info = &gs_out_info, .gfx_level = pdev->info.gfx_level, .export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, .param_offsets = stage->info.outinfo.vs_output_param_offset, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e9b06ea2a92..1c721c4bb8b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1324,39 +1324,6 @@ bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *sha shader->info.shared_size > 0 && sscreen->options.clear_lds; } -static void -si_init_gs_output_info(struct si_shader_info *info, struct si_temp_shader_variant_info *out_info) -{ - for (int i = 0; i < info->num_outputs; i++) { - unsigned slot = info->output_semantic[i]; - if (slot < VARYING_SLOT_VAR0_16BIT) { - out_info->gs_streams[slot] = info->output_streams[i]; - out_info->gs_out_usage_mask[slot] = info->output_usagemask[i]; - } else { - unsigned index = slot - VARYING_SLOT_VAR0_16BIT; - /* TODO: 16bit need separated fields for lo/hi part. */ - out_info->gs_streams_16bit_lo[index] = info->output_streams[i]; - out_info->gs_streams_16bit_hi[index] = info->output_streams[i]; - out_info->gs_out_usage_mask_16bit_lo[index] = info->output_usagemask[i]; - out_info->gs_out_usage_mask_16bit_hi[index] = info->output_usagemask[i]; - } - } - - ac_nir_gs_output_info *ac_info = &out_info->gs_out_info; - - ac_info->streams = out_info->gs_streams; - ac_info->streams_16bit_lo = out_info->gs_streams_16bit_lo; - ac_info->streams_16bit_hi = out_info->gs_streams_16bit_hi; - - ac_info->sysval_mask = out_info->gs_out_usage_mask; - ac_info->varying_mask = out_info->gs_out_usage_mask; - ac_info->varying_mask_16bit_lo = out_info->gs_out_usage_mask_16bit_lo; - ac_info->varying_mask_16bit_hi = out_info->gs_out_usage_mask_16bit_hi; - - /* TODO: construct 16bit slot per component store type. */ - ac_info->types_16bit_lo = ac_info->types_16bit_hi = NULL; -} - /* Run passes that eliminate code and affect shader_info. These should be run before linking * and shader_info gathering. Lowering passes can be run here too, but only if they lead to * better code or lower undesirable representations (like derefs). Lowering passes that prevent @@ -1624,15 +1591,12 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * ctx->temp_info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++; } - si_init_gs_output_info(&sel->info, &ctx->temp_info); - unsigned clip_cull_mask = (sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->info.culldist_mask; ac_nir_lower_legacy_gs_options options = { .has_gen_prim_query = false, .has_pipeline_stats_query = sel->screen->use_ngg, - .output_info = &ctx->temp_info.gs_out_info, .gfx_level = sel->screen->info.gfx_level, .export_clipdist_mask = clip_cull_mask, .param_offsets = ctx->temp_info.vs_output_param_offset, diff --git a/src/gallium/drivers/radeonsi/si_shader_info.h b/src/gallium/drivers/radeonsi/si_shader_info.h index 1563f22b696..5222451d8a0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.h +++ b/src/gallium/drivers/radeonsi/si_shader_info.h @@ -189,17 +189,6 @@ struct si_shader_info { * finished. */ struct si_temp_shader_variant_info { - /* Legacy GS output info. */ - uint8_t gs_streams[64]; - uint8_t gs_streams_16bit_lo[16]; - uint8_t gs_streams_16bit_hi[16]; - - uint8_t gs_out_usage_mask[64]; - uint8_t gs_out_usage_mask_16bit_lo[16]; - uint8_t gs_out_usage_mask_16bit_hi[16]; - - ac_nir_gs_output_info gs_out_info; - uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS]; bool has_non_uniform_tex_access : 1; bool has_shadow_comparison : 1;