From db41c09e179e1eceefd0655fd8e472ecc97fcc8a Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 2 Aug 2024 15:58:24 +0800 Subject: [PATCH] glsl: add KHR_shader_subgroup_arithmetic builtin functions Signed-off-by: Qiang Yu Part-of: --- src/compiler/glsl/builtin_functions.cpp | 101 ++++++++++++++++++++++++ src/compiler/glsl/glsl_to_nir.cpp | 85 ++++++++++++++++++++ src/compiler/glsl/ir.h | 24 ++++++ 3 files changed, 210 insertions(+) diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index d68b7890fa4..80de51c4bdf 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -1081,6 +1081,18 @@ subgroup_shuffle_relative_and_fp64(const _mesa_glsl_parse_state *state) return subgroup_shuffle_relative(state) && fp64(state); } +static bool +subgroup_arithmetic(const _mesa_glsl_parse_state *state) +{ + return state->KHR_shader_subgroup_arithmetic_enable; +} + +static bool +subgroup_arithmetic_and_fp64(const _mesa_glsl_parse_state *state) +{ + return subgroup_arithmetic(state) && fp64(state); +} + /** @} */ /******************************************************************************/ @@ -1509,6 +1521,11 @@ private: ir_function_signature *_shuffle_down_intrinsic(const glsl_type *type); ir_function_signature *_shuffle_down(const glsl_type *type); + ir_function_signature *_subgroup_arithmetic_intrinsic(const glsl_type *type, + enum ir_intrinsic_id id); + ir_function_signature *_subgroup_arithmetic(const glsl_type *type, + const char *intrinsic_name); + #undef B0 #undef B1 #undef B2 @@ -1648,6 +1665,13 @@ builtin_builder::create_shader() func(&glsl_type_builtin_bvec3, ##__VA_ARGS__), \ func(&glsl_type_builtin_bvec4, ##__VA_ARGS__) +#define FIUD(func, ...) \ + FIU(func, ##__VA_ARGS__), \ + func(&glsl_type_builtin_double, ##__VA_ARGS__), \ + func(&glsl_type_builtin_dvec2, ##__VA_ARGS__), \ + func(&glsl_type_builtin_dvec3, ##__VA_ARGS__), \ + func(&glsl_type_builtin_dvec4, ##__VA_ARGS__) + #define FIUBD(func, ...) \ FIUB(func, ##__VA_ARGS__), \ func(&glsl_type_builtin_double, ##__VA_ARGS__), \ @@ -1662,6 +1686,20 @@ builtin_builder::create_shader() func(&glsl_type_builtin_dvec3, avail##_and_fp64, ##__VA_ARGS__), \ func(&glsl_type_builtin_dvec4, avail##_and_fp64, ##__VA_ARGS__) +#define IUB(func, ...) \ + func(&glsl_type_builtin_int, ##__VA_ARGS__), \ + func(&glsl_type_builtin_ivec2, ##__VA_ARGS__), \ + func(&glsl_type_builtin_ivec3, ##__VA_ARGS__), \ + func(&glsl_type_builtin_ivec4, ##__VA_ARGS__), \ + func(&glsl_type_builtin_uint, ##__VA_ARGS__), \ + func(&glsl_type_builtin_uvec2, ##__VA_ARGS__), \ + func(&glsl_type_builtin_uvec3, ##__VA_ARGS__), \ + func(&glsl_type_builtin_uvec4, ##__VA_ARGS__), \ + func(&glsl_type_builtin_bool, ##__VA_ARGS__), \ + func(&glsl_type_builtin_bvec2, ##__VA_ARGS__), \ + func(&glsl_type_builtin_bvec3, ##__VA_ARGS__), \ + func(&glsl_type_builtin_bvec4, ##__VA_ARGS__) + /** * Create ir_function and ir_function_signature objects for each * intrinsic. @@ -1931,6 +1969,26 @@ builtin_builder::create_intrinsics() add_function("__intrinsic_shuffle_up", FIUBD(_shuffle_up_intrinsic), NULL); add_function("__intrinsic_shuffle_down", FIUBD(_shuffle_down_intrinsic), NULL); + +#define SUBGROUP_ARITH_INTRINSICS(ext, group) \ + add_function("__intrinsic_" #group "_add", \ + FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_add), NULL); \ + add_function("__intrinsic_" #group "_mul", \ + FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_mul), NULL); \ + add_function("__intrinsic_" #group "_min", \ + FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_min), NULL); \ + add_function("__intrinsic_" #group "_max", \ + FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_max), NULL); \ + add_function("__intrinsic_" #group "_and", \ + IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_and), NULL); \ + add_function("__intrinsic_" #group "_or", \ + IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_or), NULL); \ + add_function("__intrinsic_" #group "_xor", \ + IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_xor), NULL) + + SUBGROUP_ARITH_INTRINSICS(arithmetic, reduce); + SUBGROUP_ARITH_INTRINSICS(arithmetic, inclusive); + SUBGROUP_ARITH_INTRINSICS(arithmetic, exclusive); } /** @@ -5848,6 +5906,26 @@ builtin_builder::create_builtins() add_function("subgroupShuffleDown", FIUBD(_shuffle_down), NULL); +#define SUBGROUP_ARITH(ext, group1, group2) \ + add_function("subgroup" #group1 "Add", \ + FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_add"), NULL); \ + add_function("subgroup" #group1 "Mul", \ + FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_mul"), NULL); \ + add_function("subgroup" #group1 "Min", \ + FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_min"), NULL); \ + add_function("subgroup" #group1 "Max", \ + FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_max"), NULL); \ + add_function("subgroup" #group1 "And", \ + IUB(_subgroup_##ext, "__intrinsic_" #group2 "_and"), NULL); \ + add_function("subgroup" #group1 "Or", \ + IUB(_subgroup_##ext, "__intrinsic_" #group2 "_or"), NULL); \ + add_function("subgroup" #group1 "Xor", \ + IUB(_subgroup_##ext, "__intrinsic_" #group2 "_xor"), NULL) + + SUBGROUP_ARITH(arithmetic, /* empty */, reduce); + SUBGROUP_ARITH(arithmetic, Inclusive, inclusive); + SUBGROUP_ARITH(arithmetic, Exclusive, exclusive); + #undef F #undef FI #undef FIUDHF_VEC @@ -9249,6 +9327,29 @@ builtin_builder::_shuffle_down(const glsl_type *type) return sig; } +ir_function_signature * +builtin_builder::_subgroup_arithmetic_intrinsic(const glsl_type *type, enum ir_intrinsic_id id) +{ + ir_variable *value = in_var(type, "value"); + MAKE_INTRINSIC(type, id, + glsl_type_is_double(type) ? subgroup_arithmetic_and_fp64 : subgroup_arithmetic, + 1, value); + return sig; +} + +ir_function_signature * +builtin_builder::_subgroup_arithmetic(const glsl_type *type, const char *intrinsic_name) +{ + ir_variable *value = in_var(type, "value"); + MAKE_SIG(type, glsl_type_is_double(type) ? subgroup_arithmetic_and_fp64 : subgroup_arithmetic, + 1, value); + + ir_variable *retval = body.make_temp(type, "retval"); + body.emit(call(shader->symbols->get_function(intrinsic_name), retval, sig->parameters)); + body.emit(ret(retval)); + return sig; +} + /** @} */ /******************************************************************************/ diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c99a1e3975a..ec2443aa7c1 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -849,6 +849,48 @@ deref_get_qualifier(nir_deref_instr *deref) return (gl_access_qualifier) qualifiers; } +static nir_op +get_reduction_op(enum ir_intrinsic_id id, const glsl_type *type) +{ +#define IR_CASE(op) \ + case ir_intrinsic_reduce_##op: \ + case ir_intrinsic_inclusive_##op: \ + case ir_intrinsic_exclusive_##op: \ + return CONV_OP(op); + + switch (id) { + +#define CONV_OP(op) \ + type->base_type == GLSL_TYPE_INT || type->base_type == GLSL_TYPE_UINT ? \ + nir_op_i##op : nir_op_f##op + + IR_CASE(add) + IR_CASE(mul) + +#undef CONV_OP +#define CONV_OP(op) \ + type->base_type == GLSL_TYPE_INT ? nir_op_i##op : \ + (type->base_type == GLSL_TYPE_UINT ? nir_op_u##op : nir_op_f##op) + + IR_CASE(min) + IR_CASE(max) + +#undef CONV_OP +#define CONV_OP(op) nir_op_i##op + + IR_CASE(and) + IR_CASE(or) + IR_CASE(xor) + +#undef CONV_OP + + default: + unreachable("not reached"); + } + +#undef IR_CASE +} + void nir_visitor::visit(ir_call *ir) { @@ -1098,6 +1140,33 @@ nir_visitor::visit(ir_call *ir) case ir_intrinsic_shuffle_down: op = nir_intrinsic_shuffle_down; break; + case ir_intrinsic_reduce_add: + case ir_intrinsic_reduce_mul: + case ir_intrinsic_reduce_min: + case ir_intrinsic_reduce_max: + case ir_intrinsic_reduce_and: + case ir_intrinsic_reduce_or: + case ir_intrinsic_reduce_xor: + op = nir_intrinsic_reduce; + break; + case ir_intrinsic_inclusive_add: + case ir_intrinsic_inclusive_mul: + case ir_intrinsic_inclusive_min: + case ir_intrinsic_inclusive_max: + case ir_intrinsic_inclusive_and: + case ir_intrinsic_inclusive_or: + case ir_intrinsic_inclusive_xor: + op = nir_intrinsic_inclusive_scan; + break; + case ir_intrinsic_exclusive_add: + case ir_intrinsic_exclusive_mul: + case ir_intrinsic_exclusive_min: + case ir_intrinsic_exclusive_max: + case ir_intrinsic_exclusive_and: + case ir_intrinsic_exclusive_or: + case ir_intrinsic_exclusive_xor: + op = nir_intrinsic_exclusive_scan; + break; default: unreachable("not reached"); } @@ -1471,6 +1540,22 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_reduce: + case nir_intrinsic_inclusive_scan: + case nir_intrinsic_exclusive_scan: { + const glsl_type *type = ir->return_deref->type; + nir_def_init(&instr->instr, &instr->def, glsl_get_vector_elements(type), + glsl_get_bit_size(type)); + instr->num_components = instr->def.num_components; + + ir_rvalue *value = (ir_rvalue *)ir->actual_parameters.get_head(); + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); + + nir_intrinsic_set_reduction_op(instr, get_reduction_op(ir->callee->intrinsic_id, type)); + + nir_builder_instr_insert(&b, &instr->instr); + break; + } case nir_intrinsic_shader_clock: nir_intrinsic_set_memory_scope(instr, SCOPE_SUBGROUP); FALLTHROUGH; diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index dec285c7f36..5fa4c116605 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1151,6 +1151,30 @@ enum ir_intrinsic_id { ir_intrinsic_shuffle_xor, ir_intrinsic_shuffle_up, ir_intrinsic_shuffle_down, + + ir_intrinsic_reduce_add, + ir_intrinsic_reduce_mul, + ir_intrinsic_reduce_min, + ir_intrinsic_reduce_max, + ir_intrinsic_reduce_and, + ir_intrinsic_reduce_or, + ir_intrinsic_reduce_xor, + + ir_intrinsic_inclusive_add, + ir_intrinsic_inclusive_mul, + ir_intrinsic_inclusive_min, + ir_intrinsic_inclusive_max, + ir_intrinsic_inclusive_and, + ir_intrinsic_inclusive_or, + ir_intrinsic_inclusive_xor, + + ir_intrinsic_exclusive_add, + ir_intrinsic_exclusive_mul, + ir_intrinsic_exclusive_min, + ir_intrinsic_exclusive_max, + ir_intrinsic_exclusive_and, + ir_intrinsic_exclusive_or, + ir_intrinsic_exclusive_xor, }; /*@{*/