From 1ba73621bccd1ff19c96fcd9950461e1249bbdf2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 5 Apr 2023 15:52:40 +0100 Subject: [PATCH] nir,vtn,aco,ac/llvm: make cube_face_coord_amd more direct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Marek Olšák Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 7 +------ src/amd/llvm/ac_nir_to_llvm.c | 17 +++++------------ src/compiler/nir/nir_opcodes.py | 14 +++++--------- src/compiler/spirv/vtn_amd.c | 6 +++++- 4 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b5d419535b7..b0f83eb9fec 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2503,14 +2503,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1), emit_extract_vector(ctx, in, 2, v1)}; Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]); - ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma); Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]); Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]); - sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma)); - tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/), - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma)); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc, ma); break; } case nir_op_cube_face_index_amd: { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index d36145c776a..3cb5a0da655 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1183,21 +1183,14 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_cube_face_coord_amd: { src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef results[2]; + LLVMValueRef results[3]; LLVMValueRef in[3]; for (unsigned chan = 0; chan < 3; chan++) in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, - 0); - results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, - 0); - LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, 0); - results[0] = ac_build_fdiv(&ctx->ac, results[0], ma); - results[1] = ac_build_fdiv(&ctx->ac, results[1], ma); - LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5); - results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, ""); - results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, ""); - result = ac_build_gather_values(&ctx->ac, results, 2); + results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, 0); + results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, 0); + results[2] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, 0); + result = ac_build_gather_values(&ctx->ac, results, 3); break; } diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index f7e9df073f7..ef4a7327575 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -537,16 +537,15 @@ for (unsigned bit = 0; bit < bit_size; bit++) { """) # AMD_gcn_shader extended instructions -unop_horiz("cube_face_coord_amd", 2, tfloat32, 3, tfloat32, """ -dst.x = dst.y = 0.0; +unop_horiz("cube_face_coord_amd", 3, tfloat32, 3, tfloat32, """ +dst.x = dst.y = dst.z = 0.0; float absX = fabsf(src0.x); float absY = fabsf(src0.y); float absZ = fabsf(src0.z); -float ma = 0.0; -if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; } -if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; } -if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; } +if (absX >= absY && absX >= absZ) { dst.z = 2 * src0.x; } +if (absY >= absX && absY >= absZ) { dst.z = 2 * src0.y; } +if (absZ >= absX && absZ >= absY) { dst.z = 2 * src0.z; } if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; } if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; } @@ -554,9 +553,6 @@ if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0. if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; } if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; } if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; } - -dst.x = dst.x * (1.0f / ma) + 0.5f; -dst.y = dst.y * (1.0f / ma) + 0.5f; """) unop_horiz("cube_face_index_amd", 1, tfloat32, 3, tfloat32, """ diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c index e1e29c91591..d96806f4183 100644 --- a/src/compiler/spirv/vtn_amd.c +++ b/src/compiler/spirv/vtn_amd.c @@ -35,9 +35,13 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode, case CubeFaceIndexAMD: def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); break; - case CubeFaceCoordAMD: + case CubeFaceCoordAMD: { def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); + nir_ssa_def *st = nir_channels(&b->nb, def, 0x3); + nir_ssa_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2)); + def = nir_ffma_imm2(&b->nb, st, invma, 0.5); break; + } case TimeAMD: { def = nir_pack_64_2x32(&b->nb, nir_shader_clock(&b->nb, NIR_SCOPE_SUBGROUP)); break;