diff --git a/.pick_status.json b/.pick_status.json index 63371b7f8e4..d736c2ef48e 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -499,7 +499,7 @@ "description": "nir: Add ability to lower non-const quad broadcasts to const ones.", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 8c0a0f03b7a..967d54dc65d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3881,6 +3881,7 @@ typedef struct nir_lower_subgroups_options { bool lower_shuffle_to_32bit:1; bool lower_quad:1; bool lower_quad_broadcast_dynamic:1; + bool lower_quad_broadcast_dynamic_to_const:1; } nir_lower_subgroups_options; bool nir_lower_subgroups(nir_shader *shader, diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 4462c708ec8..f5eebb85144 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -301,6 +301,46 @@ build_subgroup_mask(nir_builder *b, unsigned bit_size, nir_load_subgroup_size(b))); } +static nir_ssa_def * +lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin, + const nir_lower_subgroups_options *options) +{ + if (!options->lower_quad_broadcast_dynamic_to_const) + return lower_shuffle(b, intrin, options->lower_to_scalar, false); + + nir_ssa_def *dst = NULL; + + for (unsigned i = 0; i < 4; ++i) { + nir_intrinsic_instr *qbcst = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_quad_broadcast); + + qbcst->num_components = intrin->num_components; + qbcst->src[1] = nir_src_for_ssa(nir_imm_int(b, i)); + nir_src_copy(&qbcst->src[0], &intrin->src[0], qbcst); + nir_ssa_dest_init(&qbcst->instr, &qbcst->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + + nir_ssa_def *qbcst_dst = NULL; + + if (options->lower_to_scalar && qbcst->num_components > 1) { + qbcst_dst = lower_subgroup_op_to_scalar(b, qbcst, false); + } else { + nir_builder_instr_insert(b, &qbcst->instr); + qbcst_dst = &qbcst->dest.ssa; + } + + if (i) + dst = nir_bcsel(b, nir_ieq(b, intrin->src[1].ssa, + nir_src_for_ssa(nir_imm_int(b, i)).ssa), + qbcst_dst, dst); + else + dst = qbcst_dst; + } + + return dst; +} + static nir_ssa_def * lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options) { @@ -477,7 +517,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options) (options->lower_quad_broadcast_dynamic && intrin->intrinsic == nir_intrinsic_quad_broadcast && !nir_src_is_const(intrin->src[1]))) - return lower_shuffle(b, intrin, options->lower_to_scalar, false); + return lower_dynamic_quad_broadcast(b, intrin, options); else if (options->lower_to_scalar && intrin->num_components > 1) return lower_subgroup_op_to_scalar(b, intrin, false); break;