diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 7fc8e036431..42b5be5d79e 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -55,7 +55,6 @@ struct gl_shader_program; #define DDIV_TO_MUL_RCP 0x100000 #define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) #define SQRT_TO_ABS_SQRT 0x200000 -#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000 /* Operations for lower_64bit_integer_instructions() */ #define DIV64 (1U << 0) diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 8e6ed3c368c..f8527625cea 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -157,7 +157,6 @@ private: void find_msb_to_float_cast(ir_expression *ir); void imul_high_to_mul(ir_expression *ir); void sqrt_to_abs_sqrt(ir_expression *ir); - void mul64_to_mul_and_mul_high(ir_expression *ir); ir_expression *_carry(operand a, operand b); @@ -1615,66 +1614,6 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir) this->progress = true; } -void -lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir) -{ - /* Lower 32x32-> 64 to - * msb = imul_high(x_lo, y_lo) - * lsb = mul(x_lo, y_lo) - */ - const unsigned elements = ir->operands[0]->type->vector_elements; - - const ir_expression_operation operation = - ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32 - : ir_unop_pack_int_2x32; - - const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64 - ? glsl_type::uvec(elements) - : glsl_type::ivec(elements); - - const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64 - ? glsl_type::uvec2_type - : glsl_type::ivec2_type; - - ir_instruction &i = *base_ir; - - ir_variable *msb = - new(ir) ir_variable(var_type, "msb", ir_var_temporary); - ir_variable *lsb = - new(ir) ir_variable(var_type, "lsb", ir_var_temporary); - ir_variable *x = - new(ir) ir_variable(var_type, "x", ir_var_temporary); - ir_variable *y = - new(ir) ir_variable(var_type, "y", ir_var_temporary); - - i.insert_before(x); - i.insert_before(assign(x, ir->operands[0])); - i.insert_before(y); - i.insert_before(assign(y, ir->operands[1])); - i.insert_before(msb); - i.insert_before(lsb); - - i.insert_before(assign(msb, imul_high(x, y))); - i.insert_before(assign(lsb, mul(x, y))); - - ir_rvalue *result[4] = {NULL}; - for (unsigned elem = 0; elem < elements; elem++) { - ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type, - swizzle(lsb, elem, 1), - swizzle(msb, elem, 1), NULL, NULL); - result[elem] = expr(operation, val); - } - - ir->operation = ir_quadop_vector; - ir->init_num_operands(); - ir->operands[0] = result[0]; - ir->operands[1] = result[1]; - ir->operands[2] = result[2]; - ir->operands[3] = result[3]; - - this->progress = true; -} - ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1802,15 +1741,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) imul_high_to_mul(ir); break; - case ir_binop_mul: - if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) && - (ir->type->base_type == GLSL_TYPE_INT64 || - ir->type->base_type == GLSL_TYPE_UINT64) && - (ir->operands[0]->type->base_type == GLSL_TYPE_INT || - ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) - mul64_to_mul_and_mul_high(ir); - break; - case ir_unop_rsq: case ir_unop_sqrt: if (lowering(SQRT_TO_ABS_SQRT)) diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index abb9cb8d2de..2993fc27e22 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -3906,6 +3906,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, + .lower_int64_options = nir_lower_imul_2x32_64, .use_interpolated_input_intrinsics = true, }; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 94eda66c66c..f653f1e26fb 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -619,6 +619,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_usub_borrow = true, .lower_mul_2x32_64 = true, .lower_ifind_msb = true, + .lower_int64_options = nir_lower_imul_2x32_64, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, .lower_to_scalar = true, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index ebc5c6f87c0..931ee85645c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3393,7 +3393,7 @@ nvir_nir_shader_compiler_options(int chipset) ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | - ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | + nir_lower_imul_2x32_64 | ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | nir_lower_ufind_msb64 ); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 84d6e67bdbf..06ea8c5e535 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1081,6 +1081,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .lower_insert_word = true, .lower_rotate = true, .lower_to_scalar = true, + .lower_int64_options = nir_lower_imul_2x32_64, .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product, .has_udot_4x8 = sscreen->info.has_accelerated_dot_product, .has_dot_2x16 = sscreen->info.has_accelerated_dot_product, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7d84088e528..e59ec44cef0 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -88,6 +88,7 @@ static const nir_shader_compiler_options sp_compiler_options = { .lower_rotate = true, .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, + .lower_int64_options = nir_lower_imul_2x32_64, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, }; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 58b96ec575a..5e880020666 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -762,6 +762,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen, .lower_extract_word = true, \ .lower_insert_byte = true, \ .lower_insert_word = true, \ + .lower_int64_options = nir_lower_imul_2x32_64, \ .lower_fdph = true, \ .lower_flrp64 = true, \ .lower_rotate = true, \ diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 7dc1b33a465..b0dca2afcb0 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -729,6 +729,7 @@ static const nir_shader_compiler_options v3d_nir_options = { .lower_wpos_pntc = true, .lower_rotate = true, .lower_to_scalar = true, + .lower_int64_options = nir_lower_imul_2x32_64, .has_fsub = true, .has_isub = true, .divergence_analysis_options = diff --git a/src/mesa/state_tracker/st_glsl_to_ir.cpp b/src/mesa/state_tracker/st_glsl_to_ir.cpp index 769226065dc..d8333bb44af 100644 --- a/src/mesa/state_tracker/st_glsl_to_ir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_ir.cpp @@ -106,7 +106,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog) FDIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | - MUL64_TO_MUL_AND_MUL_HIGH | (have_ldexp ? 0 : LDEXP_TO_ARITH) | (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | CARRY_TO_ARITH | diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt index f679b2d32d1..c05cbd19824 100644 --- a/src/panfrost/ci/panfrost-t860-fails.txt +++ b/src/panfrost/ci/panfrost-t860-fails.txt @@ -87,3 +87,8 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d,Fail dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d_array,Fail dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d,Fail dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array,Fail + +dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_compute,Fail +dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_compute,Fail +dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec4_highp_vertex,Fail +dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec4_highp_vertex,Fail diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 3c5193f63fe..5ab520a37eb 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -83,6 +83,9 @@ static const nir_shader_compiler_options midgard_nir_options = { .lower_unpack_unorm_4x8 = true, .lower_unpack_snorm_4x8 = true, .lower_pack_split = true, + .lower_pack_64_2x32_split = true, + .lower_unpack_64_2x32_split = true, + .lower_int64_options = nir_lower_imul_2x32_64, .lower_doubles_options = nir_lower_dmod,