mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
nir: add nir_lower_calls_to_builtins pass
nir_builder for the GPU Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32382>
This commit is contained in:
parent
879e8bfe6d
commit
d8ece9bf3a
3 changed files with 148 additions and 0 deletions
|
|
@ -124,6 +124,7 @@ files_libnir = files(
|
|||
'nir_lower_bool_to_bitsize.c',
|
||||
'nir_lower_bool_to_float.c',
|
||||
'nir_lower_bool_to_int32.c',
|
||||
'nir_lower_calls_to_builtins.c',
|
||||
'nir_lower_cl_images.c',
|
||||
'nir_lower_clamp_color_outputs.c',
|
||||
'nir_lower_clip.c',
|
||||
|
|
|
|||
|
|
@ -5517,6 +5517,7 @@ bool nir_inline_functions(nir_shader *shader);
|
|||
void nir_cleanup_functions(nir_shader *shader);
|
||||
bool nir_link_shader_functions(nir_shader *shader,
|
||||
const nir_shader *link_shader);
|
||||
bool nir_lower_calls_to_builtins(nir_shader *s);
|
||||
|
||||
void nir_find_inlinable_uniforms(nir_shader *shader);
|
||||
void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
|
||||
|
|
|
|||
146
src/compiler/nir/nir_lower_calls_to_builtins.c
Normal file
146
src/compiler/nir/nir_lower_calls_to_builtins.c
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright 2024 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
/*
|
||||
* Lower calls to functions prefixed "nir_*" to the NIR ALU instruction or
|
||||
* intrinsic represented. This matches functions of the form:
|
||||
*
|
||||
* nir_[op name](__optional mangling suffix)
|
||||
*
|
||||
* These functions return a value if the instruction has a destination. They
|
||||
* take all instruction sources as parameters, followed by parameters for each
|
||||
* ordered intrinsic index if any.
|
||||
*
|
||||
* Mangling allows for multiple definitions of the same instruction with
|
||||
* different vector lengths and bit sizes. This could be combined with
|
||||
* __attribute_((overloadable)) for seamless overloads.
|
||||
*
|
||||
* In effect, this pass re-implements nir_builder dynamically. This exposes
|
||||
* low-level hardware intrinsics to internal driver programs. It is intended for
|
||||
* use with internal OpenCL but should theoretically work for GLSL too.
|
||||
*/
|
||||
|
||||
static void
|
||||
lower_builtin_alu(nir_builder *b, nir_call_instr *call, nir_op op)
|
||||
{
|
||||
const nir_op_info info = nir_op_infos[op];
|
||||
nir_def *srcs[NIR_ALU_MAX_INPUTS];
|
||||
|
||||
for (unsigned s = 0; s < info.num_inputs; ++s) {
|
||||
srcs[s] = call->params[1 + s].ssa;
|
||||
}
|
||||
|
||||
nir_def *res = nir_build_alu_src_arr(b, op, srcs);
|
||||
nir_store_deref(b, nir_src_as_deref(call->params[0]), res,
|
||||
nir_component_mask(res->num_components));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_builtin_intr(nir_builder *b, nir_call_instr *call, nir_intrinsic_op op)
|
||||
{
|
||||
nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, op);
|
||||
const nir_intrinsic_info info = nir_intrinsic_infos[op];
|
||||
|
||||
/* If there is a destination, the first parameter is the return deref */
|
||||
unsigned src = info.has_dest ? 1 : 0;
|
||||
assert(call->num_params == (src + info.num_srcs + info.num_indices));
|
||||
|
||||
/* The next parameters are the intrinsic sources */
|
||||
for (unsigned s = 0; s < info.num_srcs; ++s) {
|
||||
intr->src[s] = nir_src_for_ssa(call->params[src++].ssa);
|
||||
}
|
||||
|
||||
/* The remaining parameters are the intrinsic indices */
|
||||
for (unsigned s = 0; s < info.num_indices; ++s) {
|
||||
uint64_t val = nir_src_as_uint(call->params[src++]);
|
||||
intr->const_index[info.index_map[info.indices[s]] - 1] = val;
|
||||
}
|
||||
|
||||
/* Some intrinsics must infer num_components from a particular source. */
|
||||
for (unsigned s = 0; s < info.num_srcs; ++s) {
|
||||
if (info.src_components[s] == 0) {
|
||||
intr->num_components = intr->src[s].ssa->num_components;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert the instruction before any store_deref */
|
||||
nir_builder_instr_insert(b, &intr->instr);
|
||||
|
||||
/* If there is a destination, plumb it through the return deref */
|
||||
if (info.has_dest) {
|
||||
nir_deref_instr *deref = nir_src_as_deref(call->params[0]);
|
||||
|
||||
unsigned bit_size = glsl_get_bit_size(deref->type);
|
||||
unsigned num_components = MAX2(glsl_get_length(deref->type), 1);
|
||||
|
||||
nir_def_init(&intr->instr, &intr->def, num_components, bit_size);
|
||||
nir_store_deref(b, deref, &intr->def, nir_component_mask(num_components));
|
||||
|
||||
if (info.dest_components == 0 && intr->num_components == 0) {
|
||||
intr->num_components = num_components;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
/* All builtins are exposed as function calls */
|
||||
if (instr->type != nir_instr_type_call)
|
||||
return false;
|
||||
|
||||
nir_call_instr *call = nir_instr_as_call(instr);
|
||||
nir_function *func = call->callee;
|
||||
|
||||
/* We reserve all functions prefixed nir_* as builtins needing lowering. */
|
||||
if (strncmp("nir_", func->name, strlen("nir_")) != 0)
|
||||
return false;
|
||||
|
||||
/* Strip the nir_ prefix to give the name of an ALU opcode or intrinsic. Also
|
||||
* strip the __* suffix if present: we don't need mangling information, we
|
||||
* can recover vector lengths / bit sizes from the NIR. This implements a
|
||||
* crude form of function overloading.
|
||||
*/
|
||||
const char *intr_name = func->name + strlen("nir_");
|
||||
const char *suffix = strstr(intr_name, "__");
|
||||
unsigned len = (suffix != NULL) ? (suffix - intr_name) : strlen(intr_name);
|
||||
|
||||
/* From this point on, we must not fail. Remove the call. */
|
||||
b->cursor = nir_instr_remove(&call->instr);
|
||||
|
||||
/* Look for an ALU opcode */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(nir_op_infos); ++i) {
|
||||
if (strncmp(intr_name, nir_op_infos[i].name, len) == 0 &&
|
||||
strlen(nir_op_infos[i].name) == len) {
|
||||
|
||||
lower_builtin_alu(b, call, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Look for an intrinsic */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(nir_intrinsic_infos); ++i) {
|
||||
if (strncmp(intr_name, nir_intrinsic_infos[i].name, len) == 0 &&
|
||||
strlen(nir_intrinsic_infos[i].name) == len) {
|
||||
|
||||
lower_builtin_intr(b, call, i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* We must have matched something! */
|
||||
fprintf(stderr, "unknown opcode %s\n", func->name);
|
||||
unreachable("invalid nir opcode/intrinsic");
|
||||
}
|
||||
|
||||
bool
|
||||
nir_lower_calls_to_builtins(nir_shader *s)
|
||||
{
|
||||
return nir_shader_instructions_pass(s, lower, nir_metadata_none, NULL);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue