aco: allow p_start_linear_vgpr to use multiple operands

Merging the p_create_vector into the p_start_linear_vgpr is useful since
we stopped attempting to place the p_start_linear_vgpr definition in the
same registers as the operand.

fossil-db (navi31):
Totals from 927 (1.17% of 79242) affected shaders:
MaxWaves: 26412 -> 26442 (+0.11%)
Instrs: 938328 -> 938181 (-0.02%); split: -0.14%, +0.13%
CodeSize: 4891448 -> 4890820 (-0.01%); split: -0.11%, +0.10%
VGPRs: 47016 -> 47004 (-0.03%); split: -0.13%, +0.10%
SpillSGPRs: 222 -> 226 (+1.80%)
Latency: 5076065 -> 5075191 (-0.02%); split: -0.12%, +0.10%
InvThroughput: 712316 -> 712421 (+0.01%); split: -0.09%, +0.10%
SClause: 27992 -> 27972 (-0.07%); split: -0.09%, +0.02%
Copies: 38042 -> 38104 (+0.16%); split: -1.95%, +2.12%
PreVGPRs: 39448 -> 39369 (-0.20%)
VALU: 570157 -> 570224 (+0.01%); split: -0.13%, +0.14%
SALU: 51672 -> 51678 (+0.01%); split: -0.01%, +0.02%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
This commit is contained in:
Rhys Perry 2024-02-19 17:00:19 +00:00 committed by Marge Bot
parent f764f6848a
commit 5e17a39b15
5 changed files with 55 additions and 78 deletions

View file

@ -9271,7 +9271,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_strict_wqm_coord_amd: {
Temp dst = get_ssa_temp(ctx, &instr->def);
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
Temp tmp = bld.tmp(RegClass::get(RegType::vgpr, dst.bytes()));
unsigned begin_size = nir_intrinsic_base(instr);
unsigned num_src = 1;
@ -9280,7 +9279,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
num_src = src.bytes() / it->second[0].bytes();
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
aco_opcode::p_create_vector, Format::PSEUDO, num_src + !!begin_size, 1)};
aco_opcode::p_start_linear_vgpr, Format::PSEUDO, num_src + !!begin_size, 1)};
if (begin_size)
vec->operands[0] = Operand(RegClass::get(RegType::vgpr, begin_size));
@ -9289,10 +9288,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
vec->operands[i + !!begin_size] = Operand(comp);
}
vec->definitions[0] = Definition(tmp);
vec->definitions[0] = Definition(dst);
ctx->block->instructions.emplace_back(std::move(vec));
bld.pseudo(aco_opcode::p_start_linear_vgpr, Definition(dst), tmp);
break;
}
case nir_intrinsic_load_lds_ngg_scratch_base_amd: {

View file

@ -2357,14 +2357,18 @@ lower_to_hw_instr(Program* program)
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
break;
}
case aco_opcode::p_create_vector: {
case aco_opcode::p_create_vector:
case aco_opcode::p_start_linear_vgpr: {
if (instr->operands.empty())
break;
std::map<PhysReg, copy_operation> copy_operations;
PhysReg reg = instr->definitions[0].physReg();
for (const Operand& op : instr->operands) {
RegClass rc = RegClass::get(instr->definitions[0].regClass().type(), op.bytes());
if (op.isConstant()) {
const Definition def = Definition(
reg, instr->definitions[0].getTemp().regClass().resize(op.bytes()));
const Definition def = Definition(reg, rc);
copy_operations[reg] = {op, def, op.bytes()};
reg.reg_b += op.bytes();
continue;
@ -2375,10 +2379,7 @@ lower_to_hw_instr(Program* program)
continue;
}
RegClass rc_def =
op.regClass().is_subdword()
? op.regClass()
: instr->definitions[0].getTemp().regClass().resize(op.bytes());
RegClass rc_def = op.regClass().is_subdword() ? op.regClass() : rc;
const Definition def = Definition(reg, rc_def);
copy_operations[def.physReg()] = {op, def, op.bytes()};
reg.reg_b += op.bytes();
@ -2411,19 +2412,6 @@ lower_to_hw_instr(Program* program)
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
break;
}
case aco_opcode::p_start_linear_vgpr: {
if (instr->operands.empty())
break;
Definition def(instr->definitions[0].physReg(),
RegClass::get(RegType::vgpr, instr->definitions[0].bytes()));
std::map<PhysReg, copy_operation> copy_operations;
copy_operations[def.physReg()] = {instr->operands[0], def,
instr->operands[0].bytes()};
handle_operands(copy_operations, &ctx, program->gfx_level, pi);
break;
}
case aco_opcode::p_exit_early_if: {
/* don't bother with an early exit near the end of the program */
if ((block->instructions.size() - 1 - instr_idx) <= 4 &&

View file

@ -580,6 +580,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
case aco_opcode::p_linear_phi:
case aco_opcode::p_parallelcopy:
case aco_opcode::p_create_vector:
case aco_opcode::p_start_linear_vgpr:
if (temp.bytes() != instr->operands[index].bytes())
return false;
break;

View file

@ -366,6 +366,7 @@ validate_ir(Program* program)
bool flat = instr->isFlatLike();
bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
instr->opcode == aco_opcode::p_create_vector ||
instr->opcode == aco_opcode::p_start_linear_vgpr ||
instr->opcode == aco_opcode::p_jump_to_epilog ||
instr->opcode == aco_opcode::p_dual_src_export_gfx11 ||
instr->opcode == aco_opcode::p_end_with_regs ||
@ -527,20 +528,26 @@ validate_ir(Program* program)
switch (instr->format) {
case Format::PSEUDO: {
if (instr->opcode == aco_opcode::p_create_vector) {
if (instr->opcode == aco_opcode::p_create_vector ||
instr->opcode == aco_opcode::p_start_linear_vgpr) {
unsigned size = 0;
for (const Operand& op : instr->operands) {
check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
size += op.bytes();
}
check(size == instr->definitions[0].bytes(),
"Definition size does not match operand sizes", instr.get());
if (!instr->operands.empty() || instr->opcode == aco_opcode::p_create_vector) {
check(size == instr->definitions[0].bytes(),
"Definition size does not match operand sizes", instr.get());
}
if (instr->definitions[0].regClass().type() == RegType::sgpr) {
for (const Operand& op : instr->operands) {
check(op.isConstant() || op.regClass().type() == RegType::sgpr,
"Wrong Operand type for scalar vector", instr.get());
}
}
if (instr->opcode == aco_opcode::p_start_linear_vgpr)
check(instr->definitions[0].regClass().is_linear_vgpr(),
"Definition must be linear VGPR", instr.get());
} else if (instr->opcode == aco_opcode::p_extract_vector) {
check(!instr->operands[0].isConstant() && instr->operands[1].isConstant(),
"Wrong Operand types", instr.get());
@ -680,15 +687,6 @@ validate_ir(Program* program)
instr->operands[i].isOfType(RegType::vgpr) || instr->operands[i].isUndefined(),
"Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get());
}
} else if (instr->opcode == aco_opcode::p_start_linear_vgpr) {
check(instr->definitions.size() == 1, "Must have one definition", instr.get());
check(instr->operands.size() <= 1, "Must have one or zero operands", instr.get());
if (!instr->definitions.empty())
check(instr->definitions[0].regClass().is_linear_vgpr(),
"Definition must be linear VGPR", instr.get());
if (!instr->definitions.empty() && !instr->operands.empty())
check(instr->definitions[0].bytes() == instr->operands[0].bytes(),
"Operand size must match definition", instr.get());
}
break;
}

View file

@ -52,8 +52,7 @@ BEGIN_TEST(d3d11_derivs.simple)
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
//>> v2: %vec = p_create_vector (kill)%x, (kill)%y
//>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
@ -63,8 +62,8 @@ BEGIN_TEST(d3d11_derivs.simple)
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#ry_tmp2, v#ry_tmp ; $_
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp2] ; $_ $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
@ -94,8 +93,7 @@ BEGIN_TEST(d3d11_derivs.constant)
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> v2: %vec = p_create_vector (kill)%x, -0.5
//>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, -0.5
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
@ -134,7 +132,7 @@ BEGIN_TEST(d3d11_derivs.discard)
pbld.add_vsfs(vs, fs);
/* The interpolation must be done before the discard_if. */
//>> lv2: %wqm = p_start_linear_vgpr (kill)%_
//>> lv2: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_
//>> s2: %_:exec, s1: (kill)%_:scc = s_andn2_b64 %_:exec, %_
//>> s2: %_, s1: %_:scc = s_andn2_b64 (kill)%_, (kill)%_
//>> p_exit_early_if (kill)%_:scc
@ -167,8 +165,7 @@ BEGIN_TEST(d3d11_derivs.bias)
pbld.add_vsfs(vs, fs);
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> v3: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d
//>> BB2
@ -176,12 +173,12 @@ BEGIN_TEST(d3d11_derivs.bias)
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rb, v2 ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> BB1:
//>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
END_TEST
@ -210,8 +207,7 @@ BEGIN_TEST(d3d11_derivs.offset)
PipelineBuilder pbld(get_vk_device(GFX9));
pbld.add_vsfs(vs, fs);
//>> v3: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
//>> BB1
//>> v1: %offset = p_parallelcopy 0x201
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%offset 2d
@ -220,8 +216,9 @@ BEGIN_TEST(d3d11_derivs.offset)
//>> p_end_linear_vgpr (kill)%wqm
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> BB1:
//>> v_mov_b32_e32 v#ro_tmp, 0x201 ; $_ $_
@ -256,8 +253,7 @@ BEGIN_TEST(d3d11_derivs.array)
pbld.add_vsfs(vs, fs);
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v3: %vec = p_create_vector (kill)%_, (kill)%_, (kill)%layer
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
//>> BB2
@ -266,9 +262,11 @@ BEGIN_TEST(d3d11_derivs.array)
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
//>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_
//>> v_interp_p2_f32_e32 v#rx, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> BB1:
//; success = rx+1 == ry and rx+2 == rl
@ -302,8 +300,7 @@ BEGIN_TEST(d3d11_derivs.bias_array)
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v4: %vec = p_create_vector v1: undef, (kill)%_, (kill)%_, (kill)%layer
//>> lv4: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da
//>> BB2
@ -312,11 +309,12 @@ BEGIN_TEST(d3d11_derivs.bias_array)
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
//>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> BB1:
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry, v#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
@ -347,8 +345,7 @@ BEGIN_TEST(d3d11_derivs._1d_gfx9)
pbld.add_vsfs(vs, fs);
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> v2: %vec = p_create_vector (kill)%x, 0.5
//>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, 0.5
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
@ -389,8 +386,7 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
//>> v3: %vec = p_create_vector (kill)%x, 0.5, (kill)%layer
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, 0.5, (kill)%layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
//>> BB2
@ -400,8 +396,9 @@ BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
//>> v_rndne_f32_e32 v#rl, v#rl_tmp ; $_
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> BB1:
//; success = rx+1 == ry and rx+2 == rl
@ -436,8 +433,7 @@ BEGIN_TEST(d3d11_derivs.cube)
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> v3: %vec = p_create_vector (kill)%x, (kill)%y, (kill)%face
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
//>> BB2
@ -446,10 +442,10 @@ BEGIN_TEST(d3d11_derivs.cube)
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#rx, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//; success = rx+1 == ry and rx+2 == rf
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
@ -484,8 +480,7 @@ BEGIN_TEST(d3d11_derivs.cube_array)
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
//>> v3: %vec = p_create_vector (kill)%x, (kill)%y, (kill)%face_layer
//>> lv3: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
//>> BB2
@ -495,12 +490,12 @@ BEGIN_TEST(d3d11_derivs.cube_array)
//>> v_rndne_f32_e32 v#rl, v#_ ; $_
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
//>> BB1:
//; success = rx+1 == ry and rx+2 == rlf
//>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
@ -566,8 +561,7 @@ BEGIN_TEST(d3d11_derivs.bc_optimize)
//>> v1: %y_coord2 = v_cndmask_b32 (kill)%_, %_, (kill)%_
//>> v1: %x = v_interp_p2_f32 (kill)%_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%y_coord2, (kill)%_:m0, (kill)%_ attr0.y
//>> v2: %vec = p_create_vector (kill)%x, (kill)%y
//>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
//>> BB1
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
//>> BB2
@ -602,8 +596,7 @@ BEGIN_TEST(d3d11_derivs.get_lod)
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
//>> v2: %vec = p_create_vector %x, %y
//>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
//>> lv2: %wqm = p_start_linear_vgpr %x, %y
//>> v1: %x0 = v_mov_b32 %x quad_perm:[0,0,0,0] bound_ctrl:1 fi
//>> v1: %x1_m_x0 = v_sub_f32 %x, %x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
//>> v1: %x2_m_x0 = v_sub_f32 (kill)%x, (kill)%x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi