diff --git a/src/amd/common/ac_nir_lower_tex.c b/src/amd/common/ac_nir_lower_tex.c index 8b49d0acceb..3e4ec1a1012 100644 --- a/src/amd/common/ac_nir_lower_tex.c +++ b/src/amd/common/ac_nir_lower_tex.c @@ -98,12 +98,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir if (tex->is_array && options->gfx_level <= GFX8 && coords[3]) coords[3] = nir_fmax(b, coords[3], nir_imm_float(b, 0.0)); - nir_ssa_def *cube_coords = nir_cube_face_coord_amd(b, nir_vec(b, coords, 3)); - nir_ssa_def *sc = nir_channel(b, cube_coords, 0); - nir_ssa_def *tc = nir_channel(b, cube_coords, 1); + nir_ssa_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3)); + nir_ssa_def *sc = nir_channel(b, cube_coords, 1); + nir_ssa_def *tc = nir_channel(b, cube_coords, 0); nir_ssa_def *ma = nir_channel(b, cube_coords, 2); nir_ssa_def *invma = nir_frcp(b, nir_fabs(b, ma)); - nir_ssa_def *id = nir_cube_face_index_amd(b, nir_vec(b, coords, 3)); + nir_ssa_def *id = nir_channel(b, cube_coords, 3); if (ddx || ddy) { sc = nir_fmul(b, sc, invma); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index b12ce1c2649..1d3bc0102b4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2498,21 +2498,15 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, true); break; } - case nir_op_cube_face_coord_amd: { + case nir_op_cube_amd: { Temp in = get_alu_src(ctx, instr->src[0], 3); Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1), emit_extract_vector(ctx, in, 2, v1)}; Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]); Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]); Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc, ma); - break; - } - case nir_op_cube_face_index_amd: { - Temp in = get_alu_src(ctx, instr->src[0], 3); - Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1), - emit_extract_vector(ctx, in, 2, v1)}; - bld.vop3(aco_opcode::v_cubeid_f32, Definition(dst), src[0], src[1], src[2]); + Temp id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), src[0], src[1], src[2]); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tc, sc, ma, id); break; } case nir_op_bcsel: { diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 4afe8928224..4b7f0b8fd92 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -396,8 +396,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_ldexp: case nir_op_frexp_sig: case nir_op_frexp_exp: - case nir_op_cube_face_index_amd: - case nir_op_cube_face_coord_amd: + case nir_op_cube_amd: case nir_op_sad_u8x4: case nir_op_udot_4x8_uadd: case nir_op_sdot_4x8_iadd: diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 25fdb985d02..e40bea60770 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -568,8 +568,7 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_pack_sint_2x16: src_components = 2; break; - case nir_op_cube_face_coord_amd: - case nir_op_cube_face_index_amd: + case nir_op_cube_amd: src_components = 3; break; case nir_op_pack_32_4x8: @@ -1190,25 +1189,17 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; } - case nir_op_cube_face_coord_amd: { + case nir_op_cube_amd: { src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef results[3]; + LLVMValueRef results[4]; LLVMValueRef in[3]; for (unsigned chan = 0; chan < 3; chan++) in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, 0); - results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, 0); + results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3, 0); + results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3, 0); results[2] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3, 0); - result = ac_build_gather_values(&ctx->ac, results, 3); - break; - } - - case nir_op_cube_face_index_amd: { - src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef in[3]; - for (unsigned chan = 0; chan < 3; chan++) - in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3, 0); + results[3] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3, 0); + result = ac_build_gather_values(&ctx->ac, results, 4); break; } diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index aedca323166..41c3bfcb24f 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -219,8 +219,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) case nir_op_vec4: case nir_op_vec3: case nir_op_vec2: - case nir_op_cube_face_coord_amd: - case nir_op_cube_face_index_amd: + case nir_op_cube_amd: /* We don't need to scalarize these ops, they're the ones generated to * group up outputs into a value that can be SSAed. */ diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index ca98f33a304..14dde910bde 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -536,38 +536,6 @@ for (unsigned bit = 0; bit < bit_size; bit++) { } """) -# AMD_gcn_shader extended instructions -unop_horiz("cube_face_coord_amd", 3, tfloat32, 3, tfloat32, """ -dst.x = dst.y = dst.z = 0.0; -float absX = fabsf(src0.x); -float absY = fabsf(src0.y); -float absZ = fabsf(src0.z); - -if (absX >= absY && absX >= absZ) { dst.z = 2 * src0.x; } -if (absY >= absX && absY >= absZ) { dst.z = 2 * src0.y; } -if (absZ >= absX && absZ >= absY) { dst.z = 2 * src0.z; } - -if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; } -if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; } -if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; } -if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; } -if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; } -if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; } -""") - -unop_horiz("cube_face_index_amd", 1, tfloat32, 3, tfloat32, """ -dst.x = 0.0; -float absX = fabsf(src0.x); -float absY = fabsf(src0.y); -float absZ = fabsf(src0.z); -if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0; -if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1; -if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2; -if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3; -if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; -if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; -""") - unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}", description = "Sum of vector components") @@ -1267,11 +1235,11 @@ dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2; triop("imad24_ir3", tint32, _2src_commutative, "(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8) + src2") -# r600-specific instruction that evaluates unnormalized cube texture coordinates +# r600/gcn specific instruction that evaluates unnormalized cube texture coordinates # and face index # The actual texture coordinates are evaluated from this according to # dst.yx / abs(dst.z) + 1.5 -unop_horiz("cube_r600", 4, tfloat32, 3, tfloat32, """ +unop_horiz("cube_amd", 4, tfloat32, 3, tfloat32, """ dst.x = dst.y = dst.z = 0.0; float absX = fabsf(src0.x); float absY = fabsf(src0.y); diff --git a/src/compiler/spirv/vtn_amd.c b/src/compiler/spirv/vtn_amd.c index fede3c75050..f73336684b4 100644 --- a/src/compiler/spirv/vtn_amd.c +++ b/src/compiler/spirv/vtn_amd.c @@ -33,11 +33,11 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode, nir_ssa_def *def; switch ((enum GcnShaderAMD)ext_opcode) { case CubeFaceIndexAMD: - def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); + def = nir_channel(&b->nb, nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5])), 3); break; case CubeFaceCoordAMD: { - def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); - nir_ssa_def *st = nir_trim_vector(&b->nb, def, 2); + def = nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); + nir_ssa_def *st = nir_swizzle(&b->nb, def, (unsigned[]){1, 0}, 2); nir_ssa_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2)); def = nir_ffma_imm2(&b->nb, st, invma, 0.5); break; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 67f247dd41c..6c6596ce0ca 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -1200,8 +1200,7 @@ visit_alu(struct lp_build_nir_context *bld_base, case nir_op_unpack_half_2x16: src_components = 1; break; - case nir_op_cube_face_coord_amd: - case nir_op_cube_face_index_amd: + case nir_op_cube_amd: src_components = 3; break; case nir_op_fsum2: diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index bbe18c02139..f7697c0ac64 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1878,7 +1878,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) return emit_tex_fdd(*alu, TexInstr::get_gradient_v, false, shader); case nir_op_fddy_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_v, true, shader); - case nir_op_cube_r600: + case nir_op_cube_amd: return emit_alu_cube(*alu, shader); default: fprintf(stderr, "Unknown instruction '"); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 66a2e7ee075..7d5ef15eabf 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -734,8 +734,6 @@ r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) case nir_op_fddy_coarse: case nir_op_fddy_fine: return nir_src_bit_size(alu->src[0].src) == 64; - case nir_op_cube_r600: - return false; default: return true; } diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp index 80720d3c9b9..4aa5847194a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tex.cpp @@ -265,8 +265,8 @@ r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_opt int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); assert(coord_idx >= 0); - auto cubed = nir_cube_r600(b, - nir_trim_vector(b, tex->src[coord_idx].src.ssa, 3)); + auto cubed = nir_cube_amd(b, + nir_trim_vector(b, tex->src[coord_idx].src.ssa, 3)); auto xy = nir_fmad(b, nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)), nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),