From daa8003e45c37d13f81958c0a74222e259d18538 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 5 Apr 2023 15:38:34 +0300 Subject: [PATCH] intel/fs: use nomask for setting cr0 for float controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The instructions manipulation cr0 use the default mask on lane0. So if for some reason that lane is disabled in some of the dispatchs, we can end up not executing the instructions. Fixes flakyness in dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.uniform_float_32_to_16.uniform_matrix_float_rtz_frag Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Tapani Pälli Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_eu_emit.c | 2 ++ src/intel/compiler/brw_fs_nir.cpp | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index fb4457e66ba..8ec4cc02516 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3614,6 +3614,8 @@ void brw_float_controls_mode(struct brw_codegen *p, unsigned mode, unsigned mask) { + assert(p->current->mask_control == BRW_MASK_DISABLE); + /* From the Skylake PRM, Volume 7, page 760: * "Implementation Restriction on Register Access: When the control * register is used as an explicit source and/or destination, hardware diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 122a42d3948..939245cf133 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1030,7 +1030,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, rnd = brw_rnd_mode_from_nir_op(instr->op); if (BRW_RND_MODE_UNSPECIFIED != rnd) - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), brw_imm_d(rnd)); assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */ inst = bld.F32TO16(result, op[0]); @@ -1139,8 +1139,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(rnd)); } if (op[0].type == BRW_REGISTER_TYPE_HF) @@ -1192,8 +1192,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(rnd)); } FALLTHROUGH; case nir_op_iadd: @@ -1258,8 +1258,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(rnd)); } inst = bld.MUL(result, op[0], op[1]); @@ -1806,8 +1806,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(rnd)); } inst = bld.MAD(result, op[2], op[1], op[0]); @@ -1817,8 +1817,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, if (nir_has_any_rounding_mode_enabled(execution_mode)) { brw_rnd_mode rnd = brw_rnd_mode_from_execution_mode(execution_mode); - bld.emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), - brw_imm_d(rnd)); + bld.exec_all().emit(SHADER_OPCODE_RND_MODE, bld.null_reg_ud(), + brw_imm_d(rnd)); } inst = bld.LRP(result, op[0], op[1], op[2]);