diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index da430340949..8e41af2d086 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -692,3 +692,51 @@ ir3_shader_outputs(const struct ir3_shader *so) { return so->nir->info.outputs_written; } + + +/* Add any missing varyings needed for stream-out. Otherwise varyings not + * used by fragment shader will be stripped out. + */ +void +ir3_link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) +{ + const struct ir3_stream_output_info *strmout = &v->shader->stream_output; + + /* + * First, any stream-out varyings not already in linkage map (ie. also + * consumed by frag shader) need to be added: + */ + for (unsigned i = 0; i < strmout->num_outputs; i++) { + const struct ir3_stream_output *out = &strmout->output[i]; + unsigned k = out->register_index; + unsigned compmask = + (1 << (out->num_components + out->start_component)) - 1; + unsigned idx, nextloc = 0; + + /* psize/pos need to be the last entries in linkage map, and will + * get added link_stream_out, so skip over them: + */ + if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || + (v->outputs[k].slot == VARYING_SLOT_POS)) + continue; + + for (idx = 0; idx < l->cnt; idx++) { + if (l->var[idx].regid == v->outputs[k].regid) + break; + nextloc = MAX2(nextloc, l->var[idx].loc + 4); + } + + /* add if not already in linkage map: */ + if (idx == l->cnt) + ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); + + /* expand component-mask if needed, ie streaming out all components + * but frag shader doesn't consume all components: + */ + if (compmask & ~l->var[idx].compmask) { + l->var[idx].compmask |= compmask; + l->max_loc = MAX2(l->max_loc, + l->var[idx].loc + util_last_bit(l->var[idx].compmask)); + } + } +} diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 1218b18eb02..d28567b0a60 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -977,6 +977,8 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) return regid(63, 0); } +void ir3_link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v); + #define VARYING_SLOT_GS_HEADER_IR3 (VARYING_SLOT_MAX + 0) #define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1) #define VARYING_SLOT_TCS_HEADER_IR3 (VARYING_SLOT_MAX + 2) diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index bd7072d5371..b7e9d2d20aa 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -607,54 +607,6 @@ tu6_emit_vs_system_values(struct tu_cs *cs, tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */ } -/* Add any missing varyings needed for stream-out. Otherwise varyings not - * used by fragment shader will be stripped out. - */ -static void -tu6_link_streamout(struct ir3_shader_linkage *l, - const struct ir3_shader_variant *v) -{ - const struct ir3_stream_output_info *info = &v->shader->stream_output; - - /* - * First, any stream-out varyings not already in linkage map (ie. also - * consumed by frag shader) need to be added: - */ - for (unsigned i = 0; i < info->num_outputs; i++) { - const struct ir3_stream_output *out = &info->output[i]; - unsigned compmask = - (1 << (out->num_components + out->start_component)) - 1; - unsigned k = out->register_index; - unsigned idx, nextloc = 0; - - /* psize/pos need to be the last entries in linkage map, and will - * get added link_stream_out, so skip over them: - */ - if (v->outputs[k].slot == VARYING_SLOT_PSIZ || - v->outputs[k].slot == VARYING_SLOT_POS) - continue; - - for (idx = 0; idx < l->cnt; idx++) { - if (l->var[idx].regid == v->outputs[k].regid) - break; - nextloc = MAX2(nextloc, l->var[idx].loc + 4); - } - - /* add if not already in linkage map: */ - if (idx == l->cnt) - ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); - - /* expand component-mask if needed, ie streaming out all components - * but frag shader doesn't consume all components: - */ - if (compmask & ~l->var[idx].compmask) { - l->var[idx].compmask |= compmask; - l->max_loc = MAX2(l->max_loc, l->var[idx].loc + - util_last_bit(l->var[idx].compmask)); - } - } -} - static void tu6_setup_streamout(struct tu_cs *cs, const struct ir3_shader_variant *v, @@ -888,7 +840,7 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_link_shaders(&linkage, last_shader, fs, true); if (last_shader->shader->stream_output.num_outputs) - tu6_link_streamout(&linkage, last_shader); + ir3_link_stream_out(&linkage, last_shader); /* We do this after linking shaders in order to know whether PrimID * passthrough needs to be enabled. diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 9ff6f3c6979..7c29bcf6453 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -82,53 +82,6 @@ fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) } } -/* Add any missing varyings needed for stream-out. Otherwise varyings not - * used by fragment shader will be stripped out. - */ -static void -link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) -{ - const struct ir3_stream_output_info *strmout = &v->shader->stream_output; - - /* - * First, any stream-out varyings not already in linkage map (ie. also - * consumed by frag shader) need to be added: - */ - for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct ir3_stream_output *out = &strmout->output[i]; - unsigned k = out->register_index; - unsigned compmask = - (1 << (out->num_components + out->start_component)) - 1; - unsigned idx, nextloc = 0; - - /* psize/pos need to be the last entries in linkage map, and will - * get added link_stream_out, so skip over them: - */ - if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || - (v->outputs[k].slot == VARYING_SLOT_POS)) - continue; - - for (idx = 0; idx < l->cnt; idx++) { - if (l->var[idx].regid == v->outputs[k].regid) - break; - nextloc = MAX2(nextloc, l->var[idx].loc + 4); - } - - /* add if not already in linkage map: */ - if (idx == l->cnt) - ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); - - /* expand component-mask if needed, ie streaming out all components - * but frag shader doesn't consume all components: - */ - if (compmask & ~l->var[idx].compmask) { - l->var[idx].compmask |= compmask; - l->max_loc = MAX2(l->max_loc, - l->var[idx].loc + util_last_bit(l->var[idx].compmask)); - } - } -} - /* TODO maybe some of this we could pre-compute once rather than having * so much draw-time logic? */ @@ -416,7 +369,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->binning_pass) - link_stream_out(&l, s[VS].v); + ir3_link_stream_out(&l, s[VS].v); OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index bdbd6322dde..c734dc156f5 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -142,52 +142,6 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RELOC(ring, so->bo, 0, 0, 0); } -/* Add any missing varyings needed for stream-out. Otherwise varyings not - * used by fragment shader will be stripped out. - */ -static void -link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) -{ - const struct ir3_stream_output_info *strmout = &v->shader->stream_output; - - /* - * First, any stream-out varyings not already in linkage map (ie. also - * consumed by frag shader) need to be added: - */ - for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct ir3_stream_output *out = &strmout->output[i]; - unsigned k = out->register_index; - unsigned compmask = - (1 << (out->num_components + out->start_component)) - 1; - unsigned idx, nextloc = 0; - - /* psize/pos need to be the last entries in linkage map, and will - * get added link_stream_out, so skip over them: - */ - if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) || - (v->outputs[k].slot == VARYING_SLOT_POS)) - continue; - - for (idx = 0; idx < l->cnt; idx++) { - if (l->var[idx].regid == v->outputs[k].regid) - break; - nextloc = MAX2(nextloc, l->var[idx].loc + 4); - } - - /* add if not already in linkage map: */ - if (idx == l->cnt) - ir3_link_add(l, v->outputs[k].regid, compmask, nextloc); - - /* expand component-mask if needed, ie streaming out all components - * but frag shader doesn't consume all components: - */ - if (compmask & ~l->var[idx].compmask) { - l->var[idx].compmask |= compmask; - l->max_loc = MAX2(l->max_loc, - l->var[idx].loc + util_last_bit(l->var[idx].compmask)); - } - } -} static void setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_variant *v, @@ -540,7 +494,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */ /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */ - link_stream_out(&l, last_shader); + ir3_link_stream_out(&l, last_shader); if (VALIDREG(layer_regid)) { layer_loc = l.max_loc;