aco: fix 64bit extract_i8/extract_i16

The old code only sign extended to 32bit, with a zero hi half.

Fixes: 1f2518ef9f ("aco: implement nir_op_extract/nir_op_insert")
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31734>
This commit is contained in:
Georg Lehmann 2024-10-18 12:23:54 +02:00 committed by Marge Bot
parent 4375133abb
commit 10951bb11a
5 changed files with 24 additions and 20 deletions

View file

@ -3841,26 +3841,38 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
index += swizzle * instr->def.bit_size / bits;
bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec),
Operand::c32(index), Operand::c32(bits), Operand::c32(is_signed));
} else if (dst.regClass() == s1) {
Temp src = get_alu_src(ctx, instr->src[0]);
bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(src),
Operand::c32(index), Operand::c32(bits), Operand::c32(is_signed));
} else if (dst.regClass() == s2) {
Temp src = get_alu_src(ctx, instr->src[0]);
aco_opcode op = is_signed ? aco_opcode::s_bfe_i64 : aco_opcode::s_bfe_u64;
Temp extract = bld.copy(bld.def(s1), Operand::c32((bits << 16) | (index * bits)));
bld.sop2(op, Definition(dst), bld.def(s1, scc), src, extract);
} else {
assert(dst.regClass().type() == RegType::vgpr);
Temp src = get_alu_src(ctx, instr->src[0]);
Definition def(dst);
if (dst.bytes() == 8) {
src = emit_extract_vector(ctx, src, index / comp, RegClass(src.type(), 1));
src = emit_extract_vector(ctx, src, index / comp, v1);
index %= comp;
def = bld.def(src.type(), 1);
def = bld.def(v1);
}
assert(def.bytes() <= 4);
if (def.regClass() == s1) {
bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src),
Operand::c32(index), Operand::c32(bits), Operand::c32(is_signed));
} else {
src = emit_extract_vector(ctx, src, 0, def.regClass());
bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index),
Operand::c32(bits), Operand::c32(is_signed));
src = emit_extract_vector(ctx, src, 0, def.regClass());
bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index),
Operand::c32(bits), Operand::c32(is_signed));
if (dst.size() == 2) {
Temp lo = def.getTemp();
Operand hi = Operand::zero();
if (is_signed)
hi = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31), lo);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
}
if (dst.size() == 2)
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
Operand::zero());
}
break;
}

View file

@ -11,8 +11,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail

View file

@ -11,8 +11,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail

View file

@ -11,8 +11,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail

View file

@ -11,8 +11,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr,Fail
spec@arb_gpu_shader_int64@execution@fs-ishl-then-ishr-loop,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail
spec@arb_sample_shading@ignore-centroid-qualifier 2,Fail