diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index a76e0f3a6b5..c23ce1ef426 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -371,6 +371,20 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, return true; } +static bool +instruction_requires_packed_data(fs_inst *inst) +{ + switch (inst->opcode) { + case FS_OPCODE_DDX_FINE: + case FS_OPCODE_DDX_COARSE: + case FS_OPCODE_DDY_FINE: + case FS_OPCODE_DDY_COARSE: + return true; + default: + return false; + } +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -417,6 +431,13 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE) return false; + /* Some instructions implemented in the generator backend, such as + * derivatives, assume that their operands are packed so we can't + * generally propagate strided regions to them. + */ + if (instruction_requires_packed_data(inst) && entry->src.stride > 1) + return false; + /* Bail if the result of composing both strides would exceed the * hardware limit. */