diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 63bec0d25dc..a3b8f917af6 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -699,7 +699,7 @@ ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) } else if (slot == VARYING_SLOT_COL1) { slot = VARYING_SLOT_BFC1; } else { - return 0; + return -1; } for (j = 0; j < so->outputs_count; j++) @@ -708,7 +708,7 @@ ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) debug_assert(0); - return 0; + return -1; } static inline int @@ -721,35 +721,71 @@ ir3_next_varying(const struct ir3_shader_variant *so, int i) } struct ir3_shader_linkage { + /* Maximum location either consumed by the fragment shader or produced by + * the last geometry stage, i.e. the size required for each vertex in the + * VPC in DWORD's. + */ uint8_t max_loc; + + /* Number of entries in var. */ uint8_t cnt; + + /* Bitset of locations used, including ones which are only used by the FS. + */ + uint32_t varmask[4]; + + /* Map from VS output to location. */ struct { uint8_t regid; uint8_t compmask; uint8_t loc; } var[32]; + + /* location for fixed-function gl_PrimitiveID passthrough */ + uint8_t primid_loc; }; static inline void -ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc) +ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask, uint8_t loc) { - int i = l->cnt++; - debug_assert(i < ARRAY_SIZE(l->var)); - l->var[i].regid = regid; - l->var[i].compmask = compmask; - l->var[i].loc = loc; + for (int j = 0; j < util_last_bit(compmask); j++) { + uint8_t comploc = loc + j; + l->varmask[comploc / 32] |= 1 << (comploc % 32); + } + l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask)); + + if (regid_ != regid(63, 0)) { + int i = l->cnt++; + debug_assert(i < ARRAY_SIZE(l->var)); + + l->var[i].regid = regid_; + l->var[i].compmask = compmask; + l->var[i].loc = loc; + } } static inline void ir3_link_shaders(struct ir3_shader_linkage *l, const struct ir3_shader_variant *vs, - const struct ir3_shader_variant *fs) + const struct ir3_shader_variant *fs, + bool pack_vs_out) { + /* On older platforms, varmask isn't programmed at all, and it appears + * that the hardware generates a mask of used VPC locations using the VS + * output map, and hangs if a FS bary instruction references a location + * not in the list. This means that we need to have a dummy entry in the + * VS out map for things like gl_PointCoord which aren't written by the + * VS. Furthermore we can't use r63.x, so just pick a random register to + * use if there is no VS output. + */ + const unsigned default_regid = pack_vs_out ? regid(63, 0) : regid(0, 0); int j = -1, k; + l->primid_loc = 0xff; + while (l->cnt < ARRAY_SIZE(l->var)) { j = ir3_next_varying(fs, j); @@ -761,7 +797,11 @@ ir3_link_shaders(struct ir3_shader_linkage *l, k = ir3_find_output(vs, fs->inputs[j].slot); - ir3_link_add(l, vs->outputs[k].regid, + if (k < 0 && fs->inputs[j].slot == VARYING_SLOT_PRIMITIVE_ID) { + l->primid_loc = fs->inputs[j].inloc; + } + + ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid, fs->inputs[j].compmask, fs->inputs[j].inloc); } } diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index e3ac144603a..a86c6e1c384 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -929,23 +929,16 @@ tu6_emit_vpc(struct tu_cs *cs, bool has_gs = gs->type != MESA_SHADER_NONE; const struct ir3_shader_variant *last_shader = has_gs ? gs : vs; struct ir3_shader_linkage linkage = { 0 }; - ir3_link_shaders(&linkage, last_shader, fs); + ir3_link_shaders(&linkage, last_shader, fs, true); if (last_shader->shader->stream_output.num_outputs) tu6_link_streamout(&linkage, last_shader); - BITSET_DECLARE(vpc_var_enables, 128) = { 0 }; - for (uint32_t i = 0; i < linkage.cnt; i++) { - const uint32_t comp_count = util_last_bit(linkage.var[i].compmask); - for (uint32_t j = 0; j < comp_count; j++) - BITSET_SET(vpc_var_enables, linkage.var[i].loc + j); - } - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); - tu_cs_emit(cs, ~vpc_var_enables[0]); - tu_cs_emit(cs, ~vpc_var_enables[1]); - tu_cs_emit(cs, ~vpc_var_enables[2]); - tu_cs_emit(cs, ~vpc_var_enables[3]); + tu_cs_emit(cs, ~linkage.varmask[0]); + tu_cs_emit(cs, ~linkage.varmask[1]); + tu_cs_emit(cs, ~linkage.varmask[2]); + tu_cs_emit(cs, ~linkage.varmask[3]); /* a6xx finds position/pointsize at the end */ const uint32_t position_regid = diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index c6a07b19389..cbcdfe57ea7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -237,7 +237,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); struct ir3_shader_linkage l = {0}; - ir3_link_shaders(&l, vp, fp); + ir3_link_shaders(&l, vp, fp, false); for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 4a0e7568250..d9ccecb06ad 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -289,7 +289,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); struct ir3_shader_linkage l = {0}; - ir3_link_shaders(&l, s[VS].v, s[FS].v); + ir3_link_shaders(&l, s[VS].v, s[FS].v, false); for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index b427f989470..a2fe505b4dd 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -410,24 +410,17 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; - ir3_link_shaders(&l, s[VS].v, s[FS].v); + ir3_link_shaders(&l, s[VS].v, s[FS].v, true); if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->binning_pass) link_stream_out(&l, s[VS].v); - BITSET_DECLARE(varbs, 128) = {0}; - uint32_t *varmask = (uint32_t *)varbs; - - for (i = 0; i < l.cnt; i++) - for (j = 0; j < util_last_bit(l.var[i].compmask); j++) - BITSET_SET(varbs, l.var[i].loc + j); - OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); - OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ - OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ - OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ - OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */ /* a5xx appends pos/psize to end of the linkage map: */ if (pos_regid != regid(63,0)) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 96bed761579..9e12cb246c9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -429,20 +429,13 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, struct ir3_shader_linkage l = {0}; const struct ir3_shader_variant *last_shader = fd6_last_shader(state); - ir3_link_shaders(&l, last_shader, fs); - - BITSET_DECLARE(varbs, 128) = {0}; - uint32_t *varmask = (uint32_t *)varbs; - - for (i = 0; i < l.cnt; i++) - for (j = 0; j < util_last_bit(l.var[i].compmask); j++) - BITSET_SET(varbs, l.var[i].loc + j); + ir3_link_shaders(&l, last_shader, fs, true); OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4); - OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ - OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ - OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ - OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */ /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */ if (last_shader->shader->stream_output.num_outputs > 0)