glsl: add KHR_shader_subgroup_arithmetic builtin functions

Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30610>
This commit is contained in:
Qiang Yu 2024-08-02 15:58:24 +08:00
parent ac0bbb5b3f
commit db41c09e17
3 changed files with 210 additions and 0 deletions

View file

@ -1081,6 +1081,18 @@ subgroup_shuffle_relative_and_fp64(const _mesa_glsl_parse_state *state)
return subgroup_shuffle_relative(state) && fp64(state);
}
static bool
subgroup_arithmetic(const _mesa_glsl_parse_state *state)
{
return state->KHR_shader_subgroup_arithmetic_enable;
}
static bool
subgroup_arithmetic_and_fp64(const _mesa_glsl_parse_state *state)
{
return subgroup_arithmetic(state) && fp64(state);
}
/** @} */
/******************************************************************************/
@ -1509,6 +1521,11 @@ private:
ir_function_signature *_shuffle_down_intrinsic(const glsl_type *type);
ir_function_signature *_shuffle_down(const glsl_type *type);
ir_function_signature *_subgroup_arithmetic_intrinsic(const glsl_type *type,
enum ir_intrinsic_id id);
ir_function_signature *_subgroup_arithmetic(const glsl_type *type,
const char *intrinsic_name);
#undef B0
#undef B1
#undef B2
@ -1648,6 +1665,13 @@ builtin_builder::create_shader()
func(&glsl_type_builtin_bvec3, ##__VA_ARGS__), \
func(&glsl_type_builtin_bvec4, ##__VA_ARGS__)
#define FIUD(func, ...) \
FIU(func, ##__VA_ARGS__), \
func(&glsl_type_builtin_double, ##__VA_ARGS__), \
func(&glsl_type_builtin_dvec2, ##__VA_ARGS__), \
func(&glsl_type_builtin_dvec3, ##__VA_ARGS__), \
func(&glsl_type_builtin_dvec4, ##__VA_ARGS__)
#define FIUBD(func, ...) \
FIUB(func, ##__VA_ARGS__), \
func(&glsl_type_builtin_double, ##__VA_ARGS__), \
@ -1662,6 +1686,20 @@ builtin_builder::create_shader()
func(&glsl_type_builtin_dvec3, avail##_and_fp64, ##__VA_ARGS__), \
func(&glsl_type_builtin_dvec4, avail##_and_fp64, ##__VA_ARGS__)
#define IUB(func, ...) \
func(&glsl_type_builtin_int, ##__VA_ARGS__), \
func(&glsl_type_builtin_ivec2, ##__VA_ARGS__), \
func(&glsl_type_builtin_ivec3, ##__VA_ARGS__), \
func(&glsl_type_builtin_ivec4, ##__VA_ARGS__), \
func(&glsl_type_builtin_uint, ##__VA_ARGS__), \
func(&glsl_type_builtin_uvec2, ##__VA_ARGS__), \
func(&glsl_type_builtin_uvec3, ##__VA_ARGS__), \
func(&glsl_type_builtin_uvec4, ##__VA_ARGS__), \
func(&glsl_type_builtin_bool, ##__VA_ARGS__), \
func(&glsl_type_builtin_bvec2, ##__VA_ARGS__), \
func(&glsl_type_builtin_bvec3, ##__VA_ARGS__), \
func(&glsl_type_builtin_bvec4, ##__VA_ARGS__)
/**
* Create ir_function and ir_function_signature objects for each
* intrinsic.
@ -1931,6 +1969,26 @@ builtin_builder::create_intrinsics()
add_function("__intrinsic_shuffle_up", FIUBD(_shuffle_up_intrinsic), NULL);
add_function("__intrinsic_shuffle_down", FIUBD(_shuffle_down_intrinsic), NULL);
#define SUBGROUP_ARITH_INTRINSICS(ext, group) \
add_function("__intrinsic_" #group "_add", \
FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_add), NULL); \
add_function("__intrinsic_" #group "_mul", \
FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_mul), NULL); \
add_function("__intrinsic_" #group "_min", \
FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_min), NULL); \
add_function("__intrinsic_" #group "_max", \
FIUD(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_max), NULL); \
add_function("__intrinsic_" #group "_and", \
IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_and), NULL); \
add_function("__intrinsic_" #group "_or", \
IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_or), NULL); \
add_function("__intrinsic_" #group "_xor", \
IUB(_subgroup_##ext##_intrinsic, ir_intrinsic_##group##_xor), NULL)
SUBGROUP_ARITH_INTRINSICS(arithmetic, reduce);
SUBGROUP_ARITH_INTRINSICS(arithmetic, inclusive);
SUBGROUP_ARITH_INTRINSICS(arithmetic, exclusive);
}
/**
@ -5848,6 +5906,26 @@ builtin_builder::create_builtins()
add_function("subgroupShuffleDown", FIUBD(_shuffle_down), NULL);
#define SUBGROUP_ARITH(ext, group1, group2) \
add_function("subgroup" #group1 "Add", \
FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_add"), NULL); \
add_function("subgroup" #group1 "Mul", \
FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_mul"), NULL); \
add_function("subgroup" #group1 "Min", \
FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_min"), NULL); \
add_function("subgroup" #group1 "Max", \
FIUD(_subgroup_##ext, "__intrinsic_" #group2 "_max"), NULL); \
add_function("subgroup" #group1 "And", \
IUB(_subgroup_##ext, "__intrinsic_" #group2 "_and"), NULL); \
add_function("subgroup" #group1 "Or", \
IUB(_subgroup_##ext, "__intrinsic_" #group2 "_or"), NULL); \
add_function("subgroup" #group1 "Xor", \
IUB(_subgroup_##ext, "__intrinsic_" #group2 "_xor"), NULL)
SUBGROUP_ARITH(arithmetic, /* empty */, reduce);
SUBGROUP_ARITH(arithmetic, Inclusive, inclusive);
SUBGROUP_ARITH(arithmetic, Exclusive, exclusive);
#undef F
#undef FI
#undef FIUDHF_VEC
@ -9249,6 +9327,29 @@ builtin_builder::_shuffle_down(const glsl_type *type)
return sig;
}
ir_function_signature *
builtin_builder::_subgroup_arithmetic_intrinsic(const glsl_type *type, enum ir_intrinsic_id id)
{
ir_variable *value = in_var(type, "value");
MAKE_INTRINSIC(type, id,
glsl_type_is_double(type) ? subgroup_arithmetic_and_fp64 : subgroup_arithmetic,
1, value);
return sig;
}
ir_function_signature *
builtin_builder::_subgroup_arithmetic(const glsl_type *type, const char *intrinsic_name)
{
ir_variable *value = in_var(type, "value");
MAKE_SIG(type, glsl_type_is_double(type) ? subgroup_arithmetic_and_fp64 : subgroup_arithmetic,
1, value);
ir_variable *retval = body.make_temp(type, "retval");
body.emit(call(shader->symbols->get_function(intrinsic_name), retval, sig->parameters));
body.emit(ret(retval));
return sig;
}
/** @} */
/******************************************************************************/

View file

@ -849,6 +849,48 @@ deref_get_qualifier(nir_deref_instr *deref)
return (gl_access_qualifier) qualifiers;
}
static nir_op
get_reduction_op(enum ir_intrinsic_id id, const glsl_type *type)
{
#define IR_CASE(op) \
case ir_intrinsic_reduce_##op: \
case ir_intrinsic_inclusive_##op: \
case ir_intrinsic_exclusive_##op: \
return CONV_OP(op);
switch (id) {
#define CONV_OP(op) \
type->base_type == GLSL_TYPE_INT || type->base_type == GLSL_TYPE_UINT ? \
nir_op_i##op : nir_op_f##op
IR_CASE(add)
IR_CASE(mul)
#undef CONV_OP
#define CONV_OP(op) \
type->base_type == GLSL_TYPE_INT ? nir_op_i##op : \
(type->base_type == GLSL_TYPE_UINT ? nir_op_u##op : nir_op_f##op)
IR_CASE(min)
IR_CASE(max)
#undef CONV_OP
#define CONV_OP(op) nir_op_i##op
IR_CASE(and)
IR_CASE(or)
IR_CASE(xor)
#undef CONV_OP
default:
unreachable("not reached");
}
#undef IR_CASE
}
void
nir_visitor::visit(ir_call *ir)
{
@ -1098,6 +1140,33 @@ nir_visitor::visit(ir_call *ir)
case ir_intrinsic_shuffle_down:
op = nir_intrinsic_shuffle_down;
break;
case ir_intrinsic_reduce_add:
case ir_intrinsic_reduce_mul:
case ir_intrinsic_reduce_min:
case ir_intrinsic_reduce_max:
case ir_intrinsic_reduce_and:
case ir_intrinsic_reduce_or:
case ir_intrinsic_reduce_xor:
op = nir_intrinsic_reduce;
break;
case ir_intrinsic_inclusive_add:
case ir_intrinsic_inclusive_mul:
case ir_intrinsic_inclusive_min:
case ir_intrinsic_inclusive_max:
case ir_intrinsic_inclusive_and:
case ir_intrinsic_inclusive_or:
case ir_intrinsic_inclusive_xor:
op = nir_intrinsic_inclusive_scan;
break;
case ir_intrinsic_exclusive_add:
case ir_intrinsic_exclusive_mul:
case ir_intrinsic_exclusive_min:
case ir_intrinsic_exclusive_max:
case ir_intrinsic_exclusive_and:
case ir_intrinsic_exclusive_or:
case ir_intrinsic_exclusive_xor:
op = nir_intrinsic_exclusive_scan;
break;
default:
unreachable("not reached");
}
@ -1471,6 +1540,22 @@ nir_visitor::visit(ir_call *ir)
nir_builder_instr_insert(&b, &instr->instr);
break;
}
case nir_intrinsic_reduce:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan: {
const glsl_type *type = ir->return_deref->type;
nir_def_init(&instr->instr, &instr->def, glsl_get_vector_elements(type),
glsl_get_bit_size(type));
instr->num_components = instr->def.num_components;
ir_rvalue *value = (ir_rvalue *)ir->actual_parameters.get_head();
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
nir_intrinsic_set_reduction_op(instr, get_reduction_op(ir->callee->intrinsic_id, type));
nir_builder_instr_insert(&b, &instr->instr);
break;
}
case nir_intrinsic_shader_clock:
nir_intrinsic_set_memory_scope(instr, SCOPE_SUBGROUP);
FALLTHROUGH;

View file

@ -1151,6 +1151,30 @@ enum ir_intrinsic_id {
ir_intrinsic_shuffle_xor,
ir_intrinsic_shuffle_up,
ir_intrinsic_shuffle_down,
ir_intrinsic_reduce_add,
ir_intrinsic_reduce_mul,
ir_intrinsic_reduce_min,
ir_intrinsic_reduce_max,
ir_intrinsic_reduce_and,
ir_intrinsic_reduce_or,
ir_intrinsic_reduce_xor,
ir_intrinsic_inclusive_add,
ir_intrinsic_inclusive_mul,
ir_intrinsic_inclusive_min,
ir_intrinsic_inclusive_max,
ir_intrinsic_inclusive_and,
ir_intrinsic_inclusive_or,
ir_intrinsic_inclusive_xor,
ir_intrinsic_exclusive_add,
ir_intrinsic_exclusive_mul,
ir_intrinsic_exclusive_min,
ir_intrinsic_exclusive_max,
ir_intrinsic_exclusive_and,
ir_intrinsic_exclusive_or,
ir_intrinsic_exclusive_xor,
};
/*@{*/