diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 080af7927e7..f07bb081c31 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -1153,6 +1153,36 @@ ac_nir_lower_indirect_derefs(nir_shader *shader, return progress; } +static int +sort_xfb(const void *_a, const void *_b) +{ + const nir_xfb_output_info *a = (const nir_xfb_output_info *)_a; + const nir_xfb_output_info *b = (const nir_xfb_output_info *)_b; + + if (a->buffer != b->buffer) + return a->buffer > b->buffer ? 1 : -1; + + assert(a->offset != b->offset); + return a->offset > b->offset ? 1 : -1; +} + +/* Return XFB info sorted by buffer and offset, so that we can generate vec4 + * stores by iterating over outputs only once. + */ +nir_xfb_info * +ac_nir_get_sorted_xfb_info(const nir_shader *nir) +{ + if (!nir->xfb_info) + return NULL; + + unsigned xfb_info_size = nir_xfb_info_size(nir->xfb_info->output_count); + nir_xfb_info *info = rzalloc_size(nir, xfb_info_size); + + memcpy(info, nir->xfb_info, xfb_info_size); + qsort(info->outputs, info->output_count, sizeof(info->outputs[0]), sort_xfb); + return info; +} + static nir_def ** get_output_and_type(ac_nir_prerast_out *out, unsigned slot, bool high_16bits, nir_alu_type **types) @@ -1270,7 +1300,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, nir_def *gsvs_ring = nir_load_ring_gsvs_amd(&b); - nir_xfb_info *info = gs_nir->xfb_info; + nir_xfb_info *info = ac_nir_get_sorted_xfb_info(gs_nir); nir_def *stream_id = NULL; if (!disable_streamout && info) stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2); @@ -1439,7 +1469,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, } if (!disable_streamout && nir->xfb_info) { - emit_streamout(&b, 0, nir->xfb_info, &out); + emit_streamout(&b, 0, ac_nir_get_sorted_xfb_info(nir), &out); preserved = nir_metadata_none; } diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index e2f6497a031..43c4ebc7835 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -54,6 +54,9 @@ typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic); struct nir_builder; typedef struct nir_builder nir_builder; +struct nir_xfb_info; +typedef struct nir_xfb_info nir_xfb_info; + /* Executed by ac_nir_cull when the current primitive is accepted. */ typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); @@ -82,6 +85,8 @@ bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_leve unsigned wave_size, unsigned workgroup_size, const struct ac_shader_args *ac_args); +nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir); + bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed, int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS], uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]); diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 8d4ef86c2c0..3ca3a68d4b0 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -1841,7 +1841,7 @@ ngg_nogs_store_edgeflag_to_lds(nir_builder *b, lower_ngg_nogs_state *s) static void ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s) { - nir_xfb_info *info = b->shader->xfb_info; + nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader); uint64_t xfb_outputs = 0; unsigned xfb_outputs_16bit = 0; @@ -2368,7 +2368,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info, static void ngg_nogs_build_streamout(nir_builder *b, lower_ngg_nogs_state *s) { - nir_xfb_info *info = b->shader->xfb_info; + nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader); nir_def *lds_scratch_base = nir_load_lds_ngg_scratch_base_amd(b); @@ -3421,7 +3421,7 @@ ngg_gs_cull_primitive(nir_builder *b, nir_def *tid_in_tg, nir_def *max_vtxcnt, static void ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s) { - nir_xfb_info *info = b->shader->xfb_info; + nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader); nir_def *tid_in_tg = nir_load_local_invocation_index(b); nir_def *max_vtxcnt = nir_load_workgroup_num_input_vertices_amd(b);