diff --git a/.pick_status.json b/.pick_status.json index 78bef1745a2..b310c740062 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -10084,7 +10084,7 @@ "description": "nir/lower_io_to_scalar: fix 64bit io splitting", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c b/src/compiler/nir/nir_lower_io_to_scalar.c index 083f8a140a0..d7fd2e29711 100644 --- a/src/compiler/nir/nir_lower_io_to_scalar.c +++ b/src/compiler/nir/nir_lower_io_to_scalar.c @@ -50,6 +50,9 @@ lower_load_input_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < intr->num_components; i++) { + bool is_64bit = (nir_intrinsic_instr_dest_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64; + unsigned newi = is_64bit ? i * 2 : i; + unsigned newc = nir_intrinsic_component(intr); nir_intrinsic_instr *chan_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic); nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, 1, @@ -57,12 +60,19 @@ lower_load_input_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) chan_intr->num_components = 1; nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr)); - nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i); + nir_intrinsic_set_component(chan_intr, (newc + newi) % 4); nir_intrinsic_set_dest_type(chan_intr, nir_intrinsic_dest_type(intr)); set_io_semantics(chan_intr, intr, i); /* offset and vertex (if needed) */ for (unsigned j = 0; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; ++j) nir_src_copy(&chan_intr->src[j], &intr->src[j], &chan_intr->instr); + if (newc + newi > 3) { + nir_ssa_def *offset = nir_imm_int(b, (newc + newi) / 4); + nir_src *src = nir_get_io_offset_src(chan_intr); + nir_src new_src = nir_src_for_ssa(offset); + offset = nir_iadd_imm(b, src->ssa, (newc + newi) / 4); + nir_src_copy(src, &new_src, &chan_intr->instr); + } nir_builder_instr_insert(b, &chan_intr->instr); @@ -131,13 +141,16 @@ lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) if (!(nir_intrinsic_write_mask(intr) & (1 << i))) continue; + bool is_64bit = (nir_intrinsic_instr_src_type(intr, 0) & NIR_ALU_TYPE_SIZE_MASK) == 64; + unsigned newi = is_64bit ? i * 2 : i; + unsigned newc = nir_intrinsic_component(intr); nir_intrinsic_instr *chan_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic); chan_intr->num_components = 1; nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr)); nir_intrinsic_set_write_mask(chan_intr, 0x1); - nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + i); + nir_intrinsic_set_component(chan_intr, (newc + newi) % 4); nir_intrinsic_set_src_type(chan_intr, nir_intrinsic_src_type(intr)); set_io_semantics(chan_intr, intr, i); @@ -153,7 +166,7 @@ lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) nir_io_xfb scalar_xfb; memset(&scalar_xfb, 0, sizeof(scalar_xfb)); - scalar_xfb.out[component % 2].num_components = 1; + scalar_xfb.out[component % 2].num_components = is_64bit ? 2 : 1; scalar_xfb.out[component % 2].buffer = xfb.out[c % 2].buffer; scalar_xfb.out[component % 2].offset = xfb.out[c % 2].offset + component - c; @@ -171,6 +184,13 @@ lower_store_output_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) /* offset and vertex (if needed) */ for (unsigned j = 1; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; ++j) nir_src_copy(&chan_intr->src[j], &intr->src[j], &chan_intr->instr); + if (newc + newi > 3) { + nir_ssa_def *offset = nir_imm_int(b, (newc + newi) / 4); + nir_src *src = nir_get_io_offset_src(chan_intr); + offset = nir_iadd_imm(b, src->ssa, (newc + newi) / 4); + nir_src new_src = nir_src_for_ssa(offset); + nir_src_copy(src, &new_src, &chan_intr->instr); + } nir_builder_instr_insert(b, &chan_intr->instr); }