mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco: Use vector-aligned operands for image_bvh8_intersect_ray
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35269>
This commit is contained in:
parent
c279dd6e61
commit
c515f1fd58
4 changed files with 165 additions and 63 deletions
|
|
@ -573,7 +573,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(7)
|
|||
("ldsdir", [Format.LDSDIR], [(1, 1)]),
|
||||
("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]),
|
||||
("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]),
|
||||
("mimg", [Format.MIMG], list(itertools.product([0, 1], [3, 4, 5, 6, 7])) + [(3, 8)]),
|
||||
("mimg", [Format.MIMG], list(itertools.product([0, 1], [3, 4, 5, 6, 7])) + [(3, 8)] + [(3, 14)]),
|
||||
("exp", [Format.EXP], [(0, 4), (0, 5)]),
|
||||
("branch", [Format.PSEUDO_BRANCH], [(0, 0), (0, 1)]),
|
||||
("barrier", [Format.PSEUDO_BARRIER], [(0, 0)]),
|
||||
|
|
|
|||
|
|
@ -1453,8 +1453,8 @@ get_tied_defs(Instruction* instr)
|
|||
ops.push_back(2);
|
||||
} else if (instr->opcode == aco_opcode::image_bvh8_intersect_ray) {
|
||||
/* VADDR starts at 3. */
|
||||
ops.push_back(3 + 2);
|
||||
ops.push_back(3 + 3);
|
||||
ops.push_back(3 + 4);
|
||||
ops.push_back(3 + 7);
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1840,16 +1840,39 @@ visit_bvh8_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp new_origin = bld.tmp(v3);
|
||||
Temp new_dir = bld.tmp(v3);
|
||||
|
||||
std::vector<Temp> args = {bvh_base,
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmax, cull_mask),
|
||||
origin, dir, node_id};
|
||||
std::vector<Temp> args = {bvh_base, tmax, cull_mask, origin, dir, node_id};
|
||||
|
||||
MIMG_instruction* mimg = emit_mimg(bld, aco_opcode::image_bvh8_intersect_ray,
|
||||
{new_origin, new_dir, result}, resource, Operand(s4), args);
|
||||
mimg->dim = ac_image_1d;
|
||||
mimg->dmask = 0xf;
|
||||
mimg->unrm = true;
|
||||
mimg->r128 = true;
|
||||
/* Use vector-aligned scalar operands in order to avoid unnecessary copies
|
||||
* when creating vectors.
|
||||
*/
|
||||
std::vector<Operand> scalar_args;
|
||||
for (unsigned i = 0; i < args.size(); ++i) {
|
||||
Temp tmp = args[i];
|
||||
for (unsigned j = 0; j < tmp.size(); j++) {
|
||||
scalar_args.emplace_back(emit_extract_vector(ctx, tmp, j, v1));
|
||||
scalar_args.back().setVectorAligned(true);
|
||||
}
|
||||
/* (tmax, cull_mask) is passed as one vector */
|
||||
if (i != 1)
|
||||
scalar_args.back().setVectorAligned(false);
|
||||
}
|
||||
|
||||
Instruction* mimg = create_instruction(aco_opcode::image_bvh8_intersect_ray, Format::MIMG,
|
||||
3 + scalar_args.size(), 3);
|
||||
mimg->definitions[0] = Definition(new_origin);
|
||||
mimg->definitions[1] = Definition(new_dir);
|
||||
mimg->definitions[2] = Definition(result);
|
||||
mimg->operands[0] = Operand(resource);
|
||||
mimg->operands[1] = Operand(s4);
|
||||
mimg->operands[2] = Operand(v1);
|
||||
for (unsigned i = 0; i < scalar_args.size(); i++)
|
||||
mimg->operands[3 + i] = scalar_args[i];
|
||||
|
||||
mimg->mimg().dim = ac_image_1d;
|
||||
mimg->mimg().dmask = 0xf;
|
||||
mimg->mimg().unrm = true;
|
||||
mimg->mimg().r128 = true;
|
||||
bld.insert(std::move(mimg));
|
||||
emit_split_vector(ctx, result, 10);
|
||||
emit_split_vector(ctx, new_origin, 3);
|
||||
emit_split_vector(ctx, new_dir, 3);
|
||||
|
|
|
|||
|
|
@ -935,28 +935,49 @@ BEGIN_TEST(regalloc.tied_defs.bvh8.killed.simple)
|
|||
return;
|
||||
|
||||
//>> s8: %_:s[0-7] = p_startpgm
|
||||
//! v2: %base:v[0-1] = p_unit_test
|
||||
//! v2: %tmax_mask:v[2-3] = p_unit_test
|
||||
//! v3: %origin:v[4-6] = p_unit_test
|
||||
//! v3: %dir:v[7-9] = p_unit_test
|
||||
//! v1: %base_lo:v[0] = p_unit_test
|
||||
//! v1: %base_hi:v[1] = p_unit_test
|
||||
//! v1: %tmax:v[2] = p_unit_test
|
||||
//! v1: %cull_mask:v[3] = p_unit_test
|
||||
//! v1: %origin_x:v[4] = p_unit_test
|
||||
//! v1: %origin_y:v[5] = p_unit_test
|
||||
//! v1: %origin_z:v[6] = p_unit_test
|
||||
//! v1: %dir_x:v[7] = p_unit_test
|
||||
//! v1: %dir_y:v[8] = p_unit_test
|
||||
//! v1: %dir_z:v[9] = p_unit_test
|
||||
//! v1: %node:v[10] = p_unit_test
|
||||
Temp base = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 0)));
|
||||
Temp tmax_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 2)));
|
||||
Temp origin = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 4)));
|
||||
Temp dir = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 7)));
|
||||
Temp base_lo = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 0)));
|
||||
Temp base_hi = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 1)));
|
||||
Temp tmax = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 2)));
|
||||
Temp cull_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 3)));
|
||||
Temp origin_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 4)));
|
||||
Temp origin_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 5)));
|
||||
Temp origin_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 6)));
|
||||
Temp dir_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 7)));
|
||||
Temp dir_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 8)));
|
||||
Temp dir_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 9)));
|
||||
Temp node = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 10)));
|
||||
|
||||
Temp new_origin = bld.tmp(v3);
|
||||
Temp new_dir = bld.tmp(v3);
|
||||
Temp result = bld.tmp(v10);
|
||||
//! v3: %new_origin:v[4-6], v3: %new_dir:v[7-9], v10: %_:v[10-19] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, %base:v[0-1], %tmax_mask:v[2-3], %origin:v[4-6], %dir:v[7-9], %node:v[10] 1d
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base, tmax_mask, origin, dir,
|
||||
node);
|
||||
//! v3: %new_origin:v[4-6], v3: %new_dir:v[7-9], v10: %_:v[10-19] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, (%base_lo:v[0], %base_hi:v[1]), (%tmax:v[2], %cull_mask:v[3]), (%origin_x:v[4], %origin_y:v[5], %origin_z:v[6]), (%dir_x:v[7], %dir_y:v[8], %dir_z:v[9]), %node:v[10] 1d
|
||||
Instruction* instr =
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base_lo, base_hi, tmax,
|
||||
cull_mask, origin_x, origin_y, origin_z, dir_x, dir_y, dir_z, node)
|
||||
.instr;
|
||||
instr->operands[3].setVectorAligned(true);
|
||||
instr->operands[5].setVectorAligned(true);
|
||||
instr->operands[7].setVectorAligned(true);
|
||||
instr->operands[8].setVectorAligned(true);
|
||||
instr->operands[10].setVectorAligned(true);
|
||||
instr->operands[11].setVectorAligned(true);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
#if 0
|
||||
BEGIN_TEST(regalloc.tied_defs.bvh8.killed.move_ops)
|
||||
if (!setup_cs("s8", GFX12))
|
||||
return;
|
||||
|
|
@ -964,15 +985,27 @@ BEGIN_TEST(regalloc.tied_defs.bvh8.killed.move_ops)
|
|||
program->dev.vgpr_limit = 16;
|
||||
|
||||
//>> s8: %_:s[0-7] = p_startpgm
|
||||
//! v2: %base:v[0-1] = p_unit_test
|
||||
//! v2: %tmax_mask:v[2-3] = p_unit_test
|
||||
//! v3: %origin:v[4-6] = p_unit_test
|
||||
//! v3: %dir:v[7-9] = p_unit_test
|
||||
//! v1: %base_lo:v[0] = p_unit_test
|
||||
//! v1: %base_hi:v[0] = p_unit_test
|
||||
//! v1: %tmax:v[2] = p_unit_test
|
||||
//! v1: %cull_mask:v[3] = p_unit_test
|
||||
//! v1: %origin_x:v[4] = p_unit_test
|
||||
//! v1: %origin_y:v[5] = p_unit_test
|
||||
//! v1: %origin_z:v[6] = p_unit_test
|
||||
//! v1: %dir_x:v[7] = p_unit_test
|
||||
//! v1: %dir_y:v[8] = p_unit_test
|
||||
//! v1: %dir_z:v[9] = p_unit_test
|
||||
//! v1: %node:v[10] = p_unit_test
|
||||
Temp base = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 0)));
|
||||
Temp tmax_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 2)));
|
||||
Temp origin = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 4)));
|
||||
Temp dir = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 7)));
|
||||
Temp base_lo = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 0)));
|
||||
Temp base_hi = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 1)));
|
||||
Temp tmax = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 2)));
|
||||
Temp cull_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 3)));
|
||||
Temp origin_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 4)));
|
||||
Temp origin_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 5)));
|
||||
Temp origin_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 6)));
|
||||
Temp dir_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 7)));
|
||||
Temp dir_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 8)));
|
||||
Temp dir_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 9)));
|
||||
Temp node = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 10)));
|
||||
|
||||
Temp new_origin = bld.tmp(v3);
|
||||
|
|
@ -981,35 +1014,59 @@ BEGIN_TEST(regalloc.tied_defs.bvh8.killed.move_ops)
|
|||
/* When allocating the last definition, we need to move the origin/dir operands to make space. */
|
||||
//! v3: %origin_copy:v[10-12], v3: %dir_copy:v[13-15], v1: %node_copy:v[4] = p_parallelcopy %origin:v[4-6], %dir:v[7-9], %node:v[10]
|
||||
//! v3: %new_origin:v[10-12], v3: %new_dir:v[13-15], v10: %_:v[0-9] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, %base:v[0-1], %tmax_mask:v[2-3], %origin_copy:v[10-12], %dir_copy:v[13-15], %node_copy:v[4] 1d
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base, tmax_mask, origin, dir,
|
||||
node);
|
||||
Instruction *instr = bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir), Definition(result),
|
||||
inputs[0], Operand(s4), Operand(v1), base_lo, base_hi, tmax, cull_mask, origin_x, origin_y,
|
||||
origin_z, dir_x, dir_y, dir_z, node).instr;
|
||||
instr->operands[3].setVectorAligned(true);
|
||||
instr->operands[5].setVectorAligned(true);
|
||||
instr->operands[7].setVectorAligned(true);
|
||||
instr->operands[8].setVectorAligned(true);
|
||||
instr->operands[10].setVectorAligned(true);
|
||||
instr->operands[11].setVectorAligned(true);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
#endif
|
||||
|
||||
BEGIN_TEST(regalloc.tied_defs.bvh8.killed.duplicate_ops)
|
||||
if (!setup_cs("s8", GFX12))
|
||||
return;
|
||||
|
||||
//>> s8: %_:s[0-7] = p_startpgm
|
||||
//! v3: %origin_dir:v[0-2] = p_unit_test
|
||||
//! v2: %base:v[3-4] = p_unit_test
|
||||
//! v2: %tmax_mask:v[5-6] = p_unit_test
|
||||
//! v1: %origin_dir_x:v[0] = p_unit_test
|
||||
//! v1: %origin_dir_y:v[1] = p_unit_test
|
||||
//! v1: %origin_dir_z:v[2] = p_unit_test
|
||||
//! v1: %base_lo:v[3] = p_unit_test
|
||||
//! v1: %base_hi:v[4] = p_unit_test
|
||||
//! v1: %tmax:v[5] = p_unit_test
|
||||
//! v1: %cull_mask:v[6] = p_unit_test
|
||||
//! v1: %node:v[7] = p_unit_test
|
||||
Temp origin_dir = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 0)));
|
||||
Temp base = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 3)));
|
||||
Temp tmax_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 5)));
|
||||
Temp origin_dir_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 0)));
|
||||
Temp origin_dir_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 1)));
|
||||
Temp origin_dir_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 2)));
|
||||
Temp base_lo = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 3)));
|
||||
Temp base_hi = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 4)));
|
||||
Temp tmax = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 5)));
|
||||
Temp cull_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 6)));
|
||||
Temp node = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 7)));
|
||||
|
||||
Temp new_origin = bld.tmp(v3);
|
||||
Temp new_dir = bld.tmp(v3);
|
||||
Temp result = bld.tmp(v10);
|
||||
//! v3: %origin_dir_copy:v[8-10] = p_parallelcopy %origin_dir:v[0-2]
|
||||
//! v3: %new_origin:v[0-2], v3: %new_dir:v[8-10], v10: %_:v[12-21] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, %base:v[3-4], %tmax_mask:v[5-6], %origin_dir:v[0-2], %origin_dir_copy:v[8-10], %node:v[7] 1d
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base, tmax_mask, origin_dir,
|
||||
origin_dir, node);
|
||||
//! v1: %origin_dir_copy_x:v[8], v1: %origin_dir_copy_y:v[9], v1: %origin_dir_copy_z:v[10] = p_parallelcopy %origin_dir_x:v[0], %origin_dir_y:v[1], %origin_dir_z:v[2]
|
||||
//! v3: %new_origin:v[0-2], v3: %new_dir:v[8-10], v10: %_:v[12-21] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, (%base_lo:v[3], %base_hi:v[4]), (%tmax:v[5], %cull_mask:v[6]), (%origin_dir_x:v[0], %origin_dir_y:v[1], %origin_dir_z:v[2]), (%origin_dir_copy_x:v[8], %origin_dir_copy_y:v[9], %origin_dir_copy_z:v[10]), %node:v[7] 1d
|
||||
Instruction* instr =
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base_lo, base_hi, tmax,
|
||||
cull_mask, origin_dir_x, origin_dir_y, origin_dir_z, origin_dir_x, origin_dir_y,
|
||||
origin_dir_z, node)
|
||||
.instr;
|
||||
instr->operands[3].setVectorAligned(true);
|
||||
instr->operands[5].setVectorAligned(true);
|
||||
instr->operands[7].setVectorAligned(true);
|
||||
instr->operands[8].setVectorAligned(true);
|
||||
instr->operands[10].setVectorAligned(true);
|
||||
instr->operands[11].setVectorAligned(true);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
|
@ -1019,34 +1076,55 @@ BEGIN_TEST(regalloc.tied_defs.bvh8.live_through.simple)
|
|||
return;
|
||||
|
||||
//>> s8: %_:s[0-7] = p_startpgm
|
||||
//! v2: %base:v[0-1] = p_unit_test
|
||||
//! v2: %tmax_mask:v[2-3] = p_unit_test
|
||||
//! v3: %origin:v[4-6] = p_unit_test
|
||||
//! v3: %dir:v[7-9] = p_unit_test
|
||||
//! v1: %base_lo:v[0] = p_unit_test
|
||||
//! v1: %base_hi:v[1] = p_unit_test
|
||||
//! v1: %tmax:v[2] = p_unit_test
|
||||
//! v1: %cull_mask:v[3] = p_unit_test
|
||||
//! v1: %origin_x:v[4] = p_unit_test
|
||||
//! v1: %origin_y:v[5] = p_unit_test
|
||||
//! v1: %origin_z:v[6] = p_unit_test
|
||||
//! v1: %dir_x:v[7] = p_unit_test
|
||||
//! v1: %dir_y:v[8] = p_unit_test
|
||||
//! v1: %dir_z:v[9] = p_unit_test
|
||||
//! v1: %node:v[10] = p_unit_test
|
||||
Temp base = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 0)));
|
||||
Temp tmax_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, PhysReg(256 + 2)));
|
||||
Temp origin = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 4)));
|
||||
Temp dir = bld.pseudo(aco_opcode::p_unit_test, bld.def(v3, PhysReg(256 + 7)));
|
||||
Temp base_lo = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 0)));
|
||||
Temp base_hi = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 1)));
|
||||
Temp tmax = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 2)));
|
||||
Temp cull_mask = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 3)));
|
||||
Temp origin_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 4)));
|
||||
Temp origin_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 5)));
|
||||
Temp origin_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 6)));
|
||||
Temp dir_x = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 7)));
|
||||
Temp dir_y = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 8)));
|
||||
Temp dir_z = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 9)));
|
||||
Temp node = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, PhysReg(256 + 10)));
|
||||
|
||||
Temp new_origin = bld.tmp(v3);
|
||||
Temp new_dir = bld.tmp(v3);
|
||||
Temp result = bld.tmp(v10);
|
||||
//! v3: %origin_copy:v[11-13], v3: %dir_copy:v[14-16] = p_parallelcopy %origin:v[4-6], %dir:v[7-9]
|
||||
//! v3: %new_origin:v[11-13], v3: %new_dir:v[14-16], v10: %_:v[18-27] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, %base:v[0-1], %tmax_mask:v[2-3], %origin_copy:v[11-13], %dir_copy:v[14-16], %node:v[10] 1d
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base, tmax_mask, origin, dir,
|
||||
node);
|
||||
//! v1: %origin_copy_x:v[11], v1: %origin_copy_y:v[12], v1: %origin_copy_z:v[13], v1: %dir_copy_x:v[14], v1: %dir_copy_y:v[15], v1: %dir_copy_z:v[16] = p_parallelcopy %origin_x:v[4], %origin_y:v[5], %origin_z:v[6], %dir_x:v[7], %dir_y:v[8], %dir_z:v[9]
|
||||
//! v3: %new_origin:v[4-6], v3: %new_dir:v[7-9], v10: %_:v[18-27] = image_bvh8_intersect_ray %_:s[0-7], s4: undef, v1: undef, (%base_lo:v[0], %base_hi:v[1]), (%tmax:v[2], %cull_mask:v[3]), (%origin_x2:v[4], %origin_y2:v[5], %origin_z2:v[6]), (%dir_x2:v[7], %dir_y2:v[8], %dir_z2:v[9]), %node:v[10] 1d
|
||||
Instruction* instr =
|
||||
bld.mimg(aco_opcode::image_bvh8_intersect_ray, Definition(new_origin), Definition(new_dir),
|
||||
Definition(result), inputs[0], Operand(s4), Operand(v1), base_lo, base_hi, tmax,
|
||||
cull_mask, origin_x, origin_y, origin_z, dir_x, dir_y, dir_z, node)
|
||||
.instr;
|
||||
instr->operands[3].setVectorAligned(true);
|
||||
instr->operands[5].setVectorAligned(true);
|
||||
instr->operands[7].setVectorAligned(true);
|
||||
instr->operands[8].setVectorAligned(true);
|
||||
instr->operands[10].setVectorAligned(true);
|
||||
instr->operands[11].setVectorAligned(true);
|
||||
|
||||
//! p_unit_test %origin:v[4-6]
|
||||
//! p_unit_test %dir:v[7-9]
|
||||
bld.pseudo(aco_opcode::p_unit_test, origin);
|
||||
bld.pseudo(aco_opcode::p_unit_test, dir);
|
||||
//! p_unit_test %origin_copy_x:v[11], %origin_copy_y:v[12], %origin_copy_z:v[13]
|
||||
//! p_unit_test %dir_copy_x:v[14], %dir_copy_y:v[15], %dir_copy_z:v[16]
|
||||
bld.pseudo(aco_opcode::p_unit_test, origin_x, origin_y, origin_z);
|
||||
bld.pseudo(aco_opcode::p_unit_test, dir_x, dir_y, dir_z);
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
#if 0
|
||||
BEGIN_TEST(regalloc.tied_defs.bvh8.live_through.move_ops)
|
||||
if (!setup_cs("s8", GFX12))
|
||||
return;
|
||||
|
|
@ -1083,6 +1161,7 @@ BEGIN_TEST(regalloc.tied_defs.bvh8.live_through.move_ops)
|
|||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
#endif
|
||||
|
||||
BEGIN_TEST(regalloc.vector_aligned.vec_overlaps_with_operand.first)
|
||||
if (!setup_cs("", GFX11))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue