diff --git a/.pick_status.json b/.pick_status.json index baf4fab45e2..ae92b486577 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1489,7 +1489,7 @@ "description": "intel/nir: Don't try to emit vector load_scratch instructions", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "53bfcdeecf4c9632e09ee641d2ca02dd9ec25e34" }, diff --git a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c index f67a414e873..3d49aaa01f7 100644 --- a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c +++ b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c @@ -53,6 +53,9 @@ dup_mem_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, } dup->num_components = num_components; + if (intrin->intrinsic == nir_intrinsic_load_scratch || + intrin->intrinsic == nir_intrinsic_store_scratch) + assert(num_components == 1); for (unsigned i = 0; i < info->num_indices; i++) dup->const_index[i] = intrin->const_index[i]; @@ -92,7 +95,7 @@ lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin, nir_ssa_def *result; nir_src *offset_src = nir_get_io_offset_src(intrin); - if (bit_size < 32 && nir_src_is_const(*offset_src)) { + if (bit_size < 32 && !needs_scalar && nir_src_is_const(*offset_src)) { /* The offset is constant so we can use a 32-bit load and just shift it * around as needed. */