diff --git a/src/intel/compiler/jay/jay_lower_scoreboard.c b/src/intel/compiler/jay/jay_lower_scoreboard.c index 252fdad5500..06244e17252 100644 --- a/src/intel/compiler/jay/jay_lower_scoreboard.c +++ b/src/intel/compiler/jay/jay_lower_scoreboard.c @@ -285,11 +285,16 @@ lower_regdist_local(jay_function *func, jay_block *block, u32_per_pipe *access) }; /* Fold the immediate preceding SYNC.nop into this instruction, allowing - * us to wait on both ALU and a SEND in the same annotation. + * us to wait on both ALU and a SEND in the same annotation. We cannot do + * this safely in the presence of predication or SIMD splitting that could + * cause any part of the instruction to get shot down, skipping the sync + * for future instructions (at least not without more tricky logic). */ if (last_sync && jay_sync_op(last_sync) == TGL_SYNC_NOP && I->dep.mode == TGL_SBID_NULL && + !I->predication && + !jay_simd_split(func->shader, I) && (I->dep.regdist == 0 || inferred_sync_pipe(func->shader->devinfo, I) == I->dep.pipe)) { diff --git a/src/intel/compiler/jay/jay_to_binary.c b/src/intel/compiler/jay/jay_to_binary.c index 935ae4d2727..01ce1395bc6 100644 --- a/src/intel/compiler/jay/jay_to_binary.c +++ b/src/intel/compiler/jay/jay_to_binary.c @@ -246,13 +246,15 @@ emit(struct brw_codegen *p, unsigned exec_size = jay_simd_width_physical(f->shader, I); // jay_print_inst(stdout, (jay_inst *) I); - /* Fix up SWSB dependencies for SIMD split instructions. The latter - * instructions do not need to redundantly wait on an SBID but might - * replicate their regdists. - */ + /* Replicate the SWSB regdist for SIMD split instructions if needed */ struct tgl_swsb dep = simd_offs && !I->replicate_dep ? tgl_swsb_null() : I->dep; - dep.mode = simd_offs ? TGL_SBID_NULL : dep.mode; + + /* We do not allow SBID dependencies on SIMD split instructions since + * individual groups could get shot down. This would require more tracking + * and is unclear whether it's beneficial. + */ + assert(simd_offs == 0 || I->dep.mode == TGL_SBID_NULL); if (I->decrement_dep) { unsigned delta = simd_offs * jay_macro_length(I);