From 5e17a39b15e3a11d5eda30afc03549dbbb9ea702 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 19 Feb 2024 17:00:19 +0000 Subject: [PATCH] aco: allow p_start_linear_vgpr to use multiple operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merging the p_create_vector into the p_start_linear_vgpr is useful since we stopped attempting to place the p_start_linear_vgpr definition in the same registers as the operand. fossil-db (navi31): Totals from 927 (1.17% of 79242) affected shaders: MaxWaves: 26412 -> 26442 (+0.11%) Instrs: 938328 -> 938181 (-0.02%); split: -0.14%, +0.13% CodeSize: 4891448 -> 4890820 (-0.01%); split: -0.11%, +0.10% VGPRs: 47016 -> 47004 (-0.03%); split: -0.13%, +0.10% SpillSGPRs: 222 -> 226 (+1.80%) Latency: 5076065 -> 5075191 (-0.02%); split: -0.12%, +0.10% InvThroughput: 712316 -> 712421 (+0.01%); split: -0.09%, +0.10% SClause: 27992 -> 27972 (-0.07%); split: -0.09%, +0.02% Copies: 38042 -> 38104 (+0.16%); split: -1.95%, +2.12% PreVGPRs: 39448 -> 39369 (-0.20%) VALU: 570157 -> 570224 (+0.01%); split: -0.13%, +0.14% SALU: 51672 -> 51678 (+0.01%); split: -0.01%, +0.02% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 7 +- src/amd/compiler/aco_lower_to_hw_instr.cpp | 28 ++----- src/amd/compiler/aco_optimizer.cpp | 1 + src/amd/compiler/aco_validate.cpp | 22 +++--- src/amd/compiler/tests/test_d3d11_derivs.cpp | 75 +++++++++---------- 5 files changed, 55 insertions(+), 78 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4e38dbd22c2..9a56ca2c68c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9271,7 +9271,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_strict_wqm_coord_amd: { Temp dst = get_ssa_temp(ctx, &instr->def); Temp src = get_ssa_temp(ctx, instr->src[0].ssa); - Temp tmp = bld.tmp(RegClass::get(RegType::vgpr, dst.bytes())); unsigned begin_size = nir_intrinsic_base(instr); unsigned num_src = 1; @@ -9280,7 +9279,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) num_src = src.bytes() / it->second[0].bytes(); aco_ptr vec{create_instruction( - aco_opcode::p_create_vector, Format::PSEUDO, num_src + !!begin_size, 1)}; + aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)}; if (begin_size) vec->operands[0] = Operand(RegClass::get(RegType::vgpr, begin_size)); @@ -9289,10 +9288,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) vec->operands[i + !!begin_size] = Operand(comp); } - vec->definitions[0] = Definition(tmp); + vec->definitions[0] = Definition(dst); ctx->block->instructions.emplace_back(std::move(vec)); - - bld.pseudo(aco_opcode::p_start_linear_vgpr, Definition(dst), tmp); break; } case nir_intrinsic_load_lds_ngg_scratch_base_amd: { diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 03282ea2857..9c42495f552 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2357,14 +2357,18 @@ lower_to_hw_instr(Program* program) handle_operands(copy_operations, &ctx, program->gfx_level, pi); break; } - case aco_opcode::p_create_vector: { + case aco_opcode::p_create_vector: + case aco_opcode::p_start_linear_vgpr: { + if (instr->operands.empty()) + break; + std::map copy_operations; PhysReg reg = instr->definitions[0].physReg(); for (const Operand& op : instr->operands) { + RegClass rc = RegClass::get(instr->definitions[0].regClass().type(), op.bytes()); if (op.isConstant()) { - const Definition def = Definition( - reg, instr->definitions[0].getTemp().regClass().resize(op.bytes())); + const Definition def = Definition(reg, rc); copy_operations[reg] = {op, def, op.bytes()}; reg.reg_b += op.bytes(); continue; @@ -2375,10 +2379,7 @@ lower_to_hw_instr(Program* program) continue; } - RegClass rc_def = - op.regClass().is_subdword() - ? op.regClass() - : instr->definitions[0].getTemp().regClass().resize(op.bytes()); + RegClass rc_def = op.regClass().is_subdword() ? op.regClass() : rc; const Definition def = Definition(reg, rc_def); copy_operations[def.physReg()] = {op, def, op.bytes()}; reg.reg_b += op.bytes(); @@ -2411,19 +2412,6 @@ lower_to_hw_instr(Program* program) handle_operands(copy_operations, &ctx, program->gfx_level, pi); break; } - case aco_opcode::p_start_linear_vgpr: { - if (instr->operands.empty()) - break; - - Definition def(instr->definitions[0].physReg(), - RegClass::get(RegType::vgpr, instr->definitions[0].bytes())); - - std::map copy_operations; - copy_operations[def.physReg()] = {instr->operands[0], def, - instr->operands[0].bytes()}; - handle_operands(copy_operations, &ctx, program->gfx_level, pi); - break; - } case aco_opcode::p_exit_early_if: { /* don't bother with an early exit near the end of the program */ if ((block->instructions.size() - 1 - instr_idx) <= 4 && diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 73530876c4b..dc7e003b6ab 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -580,6 +580,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr& instr, Temp temp, unsi case aco_opcode::p_linear_phi: case aco_opcode::p_parallelcopy: case aco_opcode::p_create_vector: + case aco_opcode::p_start_linear_vgpr: if (temp.bytes() != instr->operands[index].bytes()) return false; break; diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index b571e8fd09f..5d59b0af565 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -366,6 +366,7 @@ validate_ir(Program* program) bool flat = instr->isFlatLike(); bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() || instr->opcode == aco_opcode::p_create_vector || + instr->opcode == aco_opcode::p_start_linear_vgpr || instr->opcode == aco_opcode::p_jump_to_epilog || instr->opcode == aco_opcode::p_dual_src_export_gfx11 || instr->opcode == aco_opcode::p_end_with_regs || @@ -527,20 +528,26 @@ validate_ir(Program* program) switch (instr->format) { case Format::PSEUDO: { - if (instr->opcode == aco_opcode::p_create_vector) { + if (instr->opcode == aco_opcode::p_create_vector || + instr->opcode == aco_opcode::p_start_linear_vgpr) { unsigned size = 0; for (const Operand& op : instr->operands) { check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get()); size += op.bytes(); } - check(size == instr->definitions[0].bytes(), - "Definition size does not match operand sizes", instr.get()); + if (!instr->operands.empty() || instr->opcode == aco_opcode::p_create_vector) { + check(size == instr->definitions[0].bytes(), + "Definition size does not match operand sizes", instr.get()); + } if (instr->definitions[0].regClass().type() == RegType::sgpr) { for (const Operand& op : instr->operands) { check(op.isConstant() || op.regClass().type() == RegType::sgpr, "Wrong Operand type for scalar vector", instr.get()); } } + if (instr->opcode == aco_opcode::p_start_linear_vgpr) + check(instr->definitions[0].regClass().is_linear_vgpr(), + "Definition must be linear VGPR", instr.get()); } else if (instr->opcode == aco_opcode::p_extract_vector) { check(!instr->operands[0].isConstant() && instr->operands[1].isConstant(), "Wrong Operand types", instr.get()); @@ -680,15 +687,6 @@ validate_ir(Program* program) instr->operands[i].isOfType(RegType::vgpr) || instr->operands[i].isUndefined(), "Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get()); } - } else if (instr->opcode == aco_opcode::p_start_linear_vgpr) { - check(instr->definitions.size() == 1, "Must have one definition", instr.get()); - check(instr->operands.size() <= 1, "Must have one or zero operands", instr.get()); - if (!instr->definitions.empty()) - check(instr->definitions[0].regClass().is_linear_vgpr(), - "Definition must be linear VGPR", instr.get()); - if (!instr->definitions.empty() && !instr->operands.empty()) - check(instr->definitions[0].bytes() == instr->operands[0].bytes(), - "Operand size must match definition", instr.get()); } break; } diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index 17370714e2e..f180de4a157 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -52,8 +52,7 @@ BEGIN_TEST(d3d11_derivs.simple) //>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x //>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y - //>> v2: %vec = p_create_vector (kill)%x, (kill)%y - //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d //>> BB2 @@ -63,8 +62,8 @@ BEGIN_TEST(d3d11_derivs.simple) //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ - //>> v_mov_b32_e32 v#ry_tmp2, v#ry_tmp ; $_ - //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp2] ; $_ $_ + //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ + //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly"); END_TEST @@ -94,8 +93,7 @@ BEGIN_TEST(d3d11_derivs.constant) pbld.add_vsfs(vs, fs); //>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x - //>> v2: %vec = p_create_vector (kill)%x, -0.5 - //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv2: %wqm = p_start_linear_vgpr (kill)%x, -0.5 //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d //>> BB2 @@ -134,7 +132,7 @@ BEGIN_TEST(d3d11_derivs.discard) pbld.add_vsfs(vs, fs); /* The interpolation must be done before the discard_if. */ - //>> lv2: %wqm = p_start_linear_vgpr (kill)%_ + //>> lv2: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_ //>> s2: %_:exec, s1: (kill)%_:scc = s_andn2_b64 %_:exec, %_ //>> s2: %_, s1: %_:scc = s_andn2_b64 (kill)%_, (kill)%_ //>> p_exit_early_if (kill)%_:scc @@ -167,8 +165,7 @@ BEGIN_TEST(d3d11_derivs.bias) pbld.add_vsfs(vs, fs); //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm - //>> v3: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_ - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_ //>> BB1 //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d //>> BB2 @@ -176,12 +173,12 @@ BEGIN_TEST(d3d11_derivs.bias) //>> p_end_linear_vgpr (kill)%wqm pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); - //>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_ - //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ - //>> v_mov_b32_e32 v#rb, v2 ; $_ - //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ + //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ + //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ + //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ + //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //>> BB1: - //>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_ + //>> image_sample_b v[#_:#_], [v2, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_ pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly"); END_TEST @@ -210,8 +207,7 @@ BEGIN_TEST(d3d11_derivs.offset) PipelineBuilder pbld(get_vk_device(GFX9)); pbld.add_vsfs(vs, fs); - //>> v3: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_ - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_ //>> BB1 //>> v1: %offset = p_parallelcopy 0x201 //>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%offset 2d @@ -220,8 +216,9 @@ BEGIN_TEST(d3d11_derivs.offset) //>> p_end_linear_vgpr (kill)%wqm pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); - //>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_ + //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ + //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //>> BB1: //>> v_mov_b32_e32 v#ro_tmp, 0x201 ; $_ $_ @@ -256,8 +253,7 @@ BEGIN_TEST(d3d11_derivs.array) pbld.add_vsfs(vs, fs); //>> v1: %layer = v_rndne_f32 (kill)%_ - //>> v3: %vec = p_create_vector (kill)%_, (kill)%_, (kill)%layer - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_, (kill)%layer //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da //>> BB2 @@ -266,9 +262,11 @@ BEGIN_TEST(d3d11_derivs.array) pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); //>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_ - //>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_ - //>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_ + //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ + //>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_ + //>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_ + //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //>> BB1: //; success = rx+1 == ry and rx+2 == rl @@ -302,8 +300,7 @@ BEGIN_TEST(d3d11_derivs.bias_array) //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm //>> v1: %layer = v_rndne_f32 (kill)%_ - //>> v4: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_, (kill)%layer - //>> lv4: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer //>> BB1 //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da //>> BB2 @@ -312,11 +309,12 @@ BEGIN_TEST(d3d11_derivs.bias_array) pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); //>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_ - //>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_ //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ //>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_ + //>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_ //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ + //>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_ //>> BB1: //>> image_sample_b v[#_:#_], [v2, v#rx, v#ry, v#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_ $_ pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly"); @@ -347,8 +345,7 @@ BEGIN_TEST(d3d11_derivs._1d_gfx9) pbld.add_vsfs(vs, fs); //>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x - //>> v2: %vec = p_create_vector (kill)%x, 0.5 - //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv2: %wqm = p_start_linear_vgpr (kill)%x, 0.5 //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d //>> BB2 @@ -389,8 +386,7 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9) //>> v1: %layer = v_rndne_f32 (kill)%_ //>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x - //>> v3: %vec = p_create_vector (kill)%x, 0.5, (kill)%layer - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, 0.5, (kill)%layer //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da //>> BB2 @@ -400,8 +396,9 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9) //>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_ //>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_ - //>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_ + //>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_ //>> v_mov_b32_e32 v#ry, 0.5 ; $_ + //>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_ //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> BB1: //; success = rx+1 == ry and rx+2 == rl @@ -436,8 +433,7 @@ BEGIN_TEST(d3d11_derivs.cube) //>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_ //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 - //>> v3: %vec = p_create_vector (kill)%x, (kill)%y, (kill)%face - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da //>> BB2 @@ -446,10 +442,10 @@ BEGIN_TEST(d3d11_derivs.cube) pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR"); //>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_ - //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ + //>> v_fmaak_f32 v#rx, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_ - //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_ + //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //; success = rx+1 == ry and rx+2 == rf //>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly"); @@ -484,8 +480,7 @@ BEGIN_TEST(d3d11_derivs.cube_array) //>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000 //>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000 //>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000 - //>> v3: %vec = p_create_vector (kill)%x, (kill)%y, (kill)%face_layer - //>> lv3: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da //>> BB2 @@ -495,12 +490,12 @@ BEGIN_TEST(d3d11_derivs.cube_array) //>> v_rndne_f32_e32 v#rl, v#_ ; $_ //>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_ + //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_ //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ - //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_ - //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_ + //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ //>> BB1: //; success = rx+1 == ry and rx+2 == rlf //>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ @@ -566,8 +561,7 @@ BEGIN_TEST(d3d11_derivs.bc_optimize) //>> v1: %y_coord2 = v_cndmask_b32 (kill)%_, %_, (kill)%_ //>> v1: %x = v_interp_p2_f32 (kill)%_, %_:m0, (kill)%_ attr0.x //>> v1: %y = v_interp_p2_f32 (kill)%y_coord2, (kill)%_:m0, (kill)%_ attr0.y - //>> v2: %vec = p_create_vector (kill)%x, (kill)%y - //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y //>> BB1 //>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d //>> BB2 @@ -602,8 +596,7 @@ BEGIN_TEST(d3d11_derivs.get_lod) //>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x //>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y - //>> v2: %vec = p_create_vector %x, %y - //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec + //>> lv2: %wqm = p_start_linear_vgpr %x, %y //>> v1: %x0 = v_mov_b32 %x quad_perm:[0,0,0,0] bound_ctrl:1 fi //>> v1: %x1_m_x0 = v_sub_f32 %x, %x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi //>> v1: %x2_m_x0 = v_sub_f32 (kill)%x, (kill)%x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi