From 43d877ce1af23170cbcdd7cb2696965cd8c9dd4e Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sat, 12 Jul 2025 23:18:18 +0200 Subject: [PATCH] r600/sfn: lower bany/ball *(n)equal in nir The code emitted in the backend was not better and often worse then what we get with the lowering. Signed-off-by: Gert Wollny Part-of: --- .../drivers/r600/sfn/sfn_instr_alu.cpp | 176 ------------------ src/gallium/drivers/r600/sfn/sfn_nir.cpp | 8 - 2 files changed, 184 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 598ce109a0c..feacda291dd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1384,15 +1384,6 @@ emit_alu_op3(const nir_alu_instr& alu, Shader& shader, const std::array& src_shuffle = {0, 1, 2}); static bool -emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); -static bool -emit_any_all_fcomp( - const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader); -static bool -emit_any_all_icomp( - const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader); - -static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader); @@ -1651,31 +1642,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) return emit_alu_op1(*alu, op1_bfrev_int, shader); case nir_op_bitfield_select: return emit_alu_op3(*alu, op3_bfi_int, shader); - - case nir_op_b32all_fequal2: - return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader); - case nir_op_b32all_fequal3: - return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader); - case nir_op_b32all_fequal4: - return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader); - case nir_op_b32all_iequal2: - return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader); - case nir_op_b32all_iequal3: - return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader); - case nir_op_b32all_iequal4: - return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader); - case nir_op_b32any_fnequal2: - return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader); - case nir_op_b32any_fnequal3: - return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader); - case nir_op_b32any_fnequal4: - return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader); - case nir_op_b32any_inequal2: - return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader); - case nir_op_b32any_inequal3: - return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader); - case nir_op_b32any_inequal4: - return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader); case nir_op_b32csel: return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1}); @@ -2450,148 +2416,6 @@ emit_alu_op3(const nir_alu_instr& alu, return true; } -static bool -emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) -{ - AluInstr *ir = nullptr; - auto& value_factory = shader.value_factory(); - - PRegister tmp[2]; - tmp[0] = value_factory.temp_register(); - tmp[1] = value_factory.temp_register(); - - for (unsigned i = 0; i < 2; ++i) { - ir = new AluInstr(opcode, - tmp[i], - value_factory.src(alu.src[0], i), - value_factory.src(alu.src[1], i), - {alu_write}); - shader.emit_instruction(ir); - } - ir->set_alu_flag(alu_last_instr); - - opcode = (opcode == op2_setne_dx10) ? op2_or_int : op2_and_int; - ir = new AluInstr(opcode, - value_factory.dest(alu.def, 0, pin_free), - tmp[0], - tmp[1], - AluInstr::last_write); - shader.emit_instruction(ir); - return true; -} - -static bool -emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader) -{ - /* This should probabyl be lowered in nir */ - auto& value_factory = shader.value_factory(); - - AluInstr *ir = nullptr; - RegisterVec4 v = value_factory.temp_vec4(pin_group); - AluInstr::SrcValues s; - - for (int i = 0; i < nc; ++i) { - s.push_back(v[i]); - } - - for (int i = nc; i < 4; ++i) - s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0)); - - for (int i = 0; i < nc; ++i) { - ir = new AluInstr(op, - v[i], - value_factory.src(alu.src[0], i), - value_factory.src(alu.src[1], i), - {alu_write}); - shader.emit_instruction(ir); - } - if (ir) - ir->set_alu_flag(alu_last_instr); - - auto max_val = value_factory.temp_register(); - - ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4); - - if (all) { - ir->set_source_mod(0, AluInstr::mod_neg); - ir->set_source_mod(1, AluInstr::mod_neg); - ir->set_source_mod(2, AluInstr::mod_neg); - ir->set_source_mod(3, AluInstr::mod_neg); - } - - shader.emit_instruction(ir); - - if (all) - op = (op == op2_sete) ? op2_sete_dx10 : op2_setne_dx10; - else - op = (op == op2_sete) ? op2_setne_dx10 : op2_sete_dx10; - - ir = new AluInstr(op, - value_factory.dest(alu.def, 0, pin_free), - max_val, - value_factory.inline_const(ALU_SRC_1, 0), - AluInstr::last_write); - if (all) - ir->set_source_mod(1, AluInstr::mod_neg); - shader.emit_instruction(ir); - - return true; -} - -static bool -emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader) -{ - /* This should probabyl be lowered in nir */ - auto& value_factory = shader.value_factory(); - - AluInstr *ir = nullptr; - PRegister v[6]; - - auto dest = value_factory.dest(alu.def, 0, pin_free); - - for (int i = 0; i < nc + nc / 2; ++i) - v[i] = value_factory.temp_register(); - - EAluOp combine = all ? op2_and_int : op2_or_int; - - for (int i = 0; i < nc; ++i) { - ir = new AluInstr(op, - v[i], - value_factory.src(alu.src[0], i), - value_factory.src(alu.src[1], i), - AluInstr::write); - shader.emit_instruction(ir); - } - if (ir) - ir->set_alu_flag(alu_last_instr); - - if (nc == 2) { - ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write); - shader.emit_instruction(ir); - return true; - } - - if (nc == 3) { - ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write); - shader.emit_instruction(ir); - ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write); - shader.emit_instruction(ir); - return true; - } - - if (nc == 4) { - ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write); - shader.emit_instruction(ir); - ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write); - shader.emit_instruction(ir); - ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write); - shader.emit_instruction(ir); - return true; - } - - return false; -} - static bool emit_dot(const nir_alu_instr& alu, int n, Shader& shader) { diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index e7667ead4e7..82ef34207ca 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -625,14 +625,6 @@ r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) auto alu = nir_instr_as_alu(instr); switch (alu->op) { - case nir_op_bany_fnequal3: - case nir_op_bany_fnequal4: - case nir_op_ball_fequal3: - case nir_op_ball_fequal4: - case nir_op_bany_inequal3: - case nir_op_bany_inequal4: - case nir_op_ball_iequal3: - case nir_op_ball_iequal4: case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: