2014-11-10 11:16:30 -08:00
|
|
|
template = """\
|
|
|
|
|
/* Copyright (C) 2015 Broadcom
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef _NIR_BUILDER_OPCODES_
|
|
|
|
|
#define _NIR_BUILDER_OPCODES_
|
|
|
|
|
|
2016-04-29 12:26:07 -07:00
|
|
|
<%
|
|
|
|
|
def src_decl_list(num_srcs):
|
2023-08-12 16:17:15 -04:00
|
|
|
return ', '.join('nir_def *src' + str(i) for i in range(num_srcs))
|
2016-04-29 12:26:07 -07:00
|
|
|
|
|
|
|
|
def src_list(num_srcs):
|
2021-11-22 11:45:23 -08:00
|
|
|
return ', '.join('src' + str(i) for i in range(num_srcs))
|
2021-12-14 14:35:03 -08:00
|
|
|
|
|
|
|
|
def needs_num_components(opcode):
|
|
|
|
|
return "replicated" in opcode.name
|
2023-06-26 14:23:08 +02:00
|
|
|
|
|
|
|
|
def intrinsic_prefix(name):
|
|
|
|
|
if name in build_prefixed_intrinsics:
|
|
|
|
|
return 'nir_build'
|
|
|
|
|
else:
|
|
|
|
|
return 'nir'
|
2016-04-29 12:26:07 -07:00
|
|
|
%>
|
|
|
|
|
|
2018-07-06 12:20:26 +02:00
|
|
|
% for name, opcode in sorted(opcodes.items()):
|
2021-12-14 14:35:03 -08:00
|
|
|
% if not needs_num_components(opcode):
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
2018-10-19 11:14:47 -05:00
|
|
|
nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
|
2016-04-29 12:26:07 -07:00
|
|
|
{
|
2022-11-29 13:28:12 -06:00
|
|
|
% if opcode.is_conversion and \
|
|
|
|
|
type_base_type(opcode.output_type) == opcode.input_types[0]:
|
|
|
|
|
if (src0->bit_size == ${type_size(opcode.output_type)})
|
|
|
|
|
return src0;
|
|
|
|
|
%endif
|
2019-03-09 17:17:55 +01:00
|
|
|
% if opcode.num_inputs <= 4:
|
2021-11-22 11:45:23 -08:00
|
|
|
return nir_build_alu${opcode.num_inputs}(build, nir_op_${name}, ${src_list(opcode.num_inputs)});
|
2019-03-09 17:17:55 +01:00
|
|
|
% else:
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *srcs[${opcode.num_inputs}] = {${src_list(opcode.num_inputs)}};
|
2019-03-09 17:17:55 +01:00
|
|
|
return nir_build_alu_src_arr(build, nir_op_${name}, srcs);
|
|
|
|
|
% endif
|
2016-04-29 12:26:07 -07:00
|
|
|
}
|
2021-12-14 14:35:03 -08:00
|
|
|
% endif
|
2014-11-10 11:16:30 -08:00
|
|
|
% endfor
|
|
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
% for name, opcode in sorted(INTR_OPCODES.items()):
|
2023-06-27 22:17:28 +02:00
|
|
|
% if opcode.indices:
|
2020-09-07 13:55:38 +01:00
|
|
|
struct _nir_${name}_indices {
|
|
|
|
|
int _; /* exists to avoid empty initializers */
|
|
|
|
|
% for index in opcode.indices:
|
|
|
|
|
${index.c_data_type} ${index.name};
|
|
|
|
|
% endfor
|
|
|
|
|
};
|
2023-06-27 22:17:28 +02:00
|
|
|
% endif
|
2020-09-07 13:55:38 +01:00
|
|
|
% endfor
|
|
|
|
|
|
2018-07-19 11:42:08 +02:00
|
|
|
<%
|
2020-09-07 13:55:38 +01:00
|
|
|
def intrinsic_decl_list(opcode):
|
|
|
|
|
need_components = opcode.dest_components == 0 and \
|
|
|
|
|
0 not in opcode.src_components
|
2018-12-04 16:40:30 +01:00
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
res = ''
|
|
|
|
|
if (opcode.has_dest or opcode.num_srcs) and need_components:
|
|
|
|
|
res += ', unsigned num_components'
|
|
|
|
|
if opcode.has_dest and len(opcode.bit_sizes) != 1 and opcode.bit_size_src == -1:
|
|
|
|
|
res += ', unsigned bit_size'
|
|
|
|
|
for i in range(opcode.num_srcs):
|
2023-08-12 16:17:15 -04:00
|
|
|
res += ', nir_def *src' + str(i)
|
2020-09-07 13:55:38 +01:00
|
|
|
if opcode.indices:
|
|
|
|
|
res += ', struct _nir_' + opcode.name + '_indices indices'
|
|
|
|
|
return res
|
2020-07-14 13:27:53 -05:00
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
def intrinsic_macro_list(opcode):
|
|
|
|
|
need_components = opcode.dest_components == 0 and \
|
|
|
|
|
0 not in opcode.src_components
|
2018-12-04 16:40:30 +01:00
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
res = ''
|
|
|
|
|
if (opcode.has_dest or opcode.num_srcs) and need_components:
|
|
|
|
|
res += ', num_components'
|
|
|
|
|
if opcode.has_dest and len(opcode.bit_sizes) != 1 and opcode.bit_size_src == -1:
|
|
|
|
|
res += ', bit_size'
|
|
|
|
|
for i in range(opcode.num_srcs):
|
|
|
|
|
res += ', src' + str(i)
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
def get_intrinsic_bitsize(opcode):
|
|
|
|
|
if len(opcode.bit_sizes) == 1:
|
|
|
|
|
return str(opcode.bit_sizes[0])
|
|
|
|
|
elif opcode.bit_size_src != -1:
|
|
|
|
|
return 'src' + str(opcode.bit_size_src) + '->bit_size'
|
|
|
|
|
else:
|
|
|
|
|
return 'bit_size'
|
2018-07-19 11:42:08 +02:00
|
|
|
%>
|
|
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
% for name, opcode in sorted(INTR_OPCODES.items()):
|
|
|
|
|
% if opcode.has_dest:
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
2020-09-07 13:55:38 +01:00
|
|
|
% else:
|
|
|
|
|
static inline nir_intrinsic_instr *
|
|
|
|
|
% endif
|
|
|
|
|
_nir_build_${name}(nir_builder *build${intrinsic_decl_list(opcode)})
|
2018-03-15 18:42:44 -04:00
|
|
|
{
|
2020-09-07 13:55:38 +01:00
|
|
|
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(
|
|
|
|
|
build->shader, nir_intrinsic_${name});
|
|
|
|
|
|
|
|
|
|
% if 0 in opcode.src_components:
|
|
|
|
|
intrin->num_components = src${opcode.src_components.index(0)}->num_components;
|
|
|
|
|
% elif opcode.dest_components == 0:
|
|
|
|
|
intrin->num_components = num_components;
|
|
|
|
|
% endif
|
|
|
|
|
% if opcode.has_dest:
|
|
|
|
|
% if opcode.dest_components == 0:
|
2023-08-12 19:01:17 -05:00
|
|
|
nir_def_init(&intrin->instr, &intrin->dest.ssa, intrin->num_components, ${get_intrinsic_bitsize(opcode)});
|
2020-09-07 13:55:38 +01:00
|
|
|
% else:
|
2023-08-12 19:01:17 -05:00
|
|
|
nir_def_init(&intrin->instr, &intrin->dest.ssa, ${opcode.dest_components}, ${get_intrinsic_bitsize(opcode)});
|
2020-09-07 13:55:38 +01:00
|
|
|
% endif
|
|
|
|
|
% endif
|
|
|
|
|
% for i in range(opcode.num_srcs):
|
|
|
|
|
intrin->src[${i}] = nir_src_for_ssa(src${i});
|
|
|
|
|
% endfor
|
2022-01-06 17:52:15 +00:00
|
|
|
% if WRITE_MASK in opcode.indices and 0 in opcode.src_components:
|
|
|
|
|
if (!indices.write_mask)
|
|
|
|
|
indices.write_mask = BITFIELD_MASK(intrin->num_components);
|
|
|
|
|
% endif
|
2022-01-06 18:04:21 +00:00
|
|
|
% if ALIGN_MUL in opcode.indices and 0 in opcode.src_components:
|
|
|
|
|
if (!indices.align_mul)
|
|
|
|
|
indices.align_mul = src${opcode.src_components.index(0)}->bit_size / 8u;
|
|
|
|
|
% elif ALIGN_MUL in opcode.indices and opcode.dest_components == 0:
|
|
|
|
|
if (!indices.align_mul)
|
|
|
|
|
indices.align_mul = intrin->dest.ssa.bit_size / 8u;
|
|
|
|
|
% endif
|
2020-09-07 13:55:38 +01:00
|
|
|
% for index in opcode.indices:
|
|
|
|
|
nir_intrinsic_set_${index.name}(intrin, indices.${index.name});
|
|
|
|
|
% endfor
|
|
|
|
|
|
|
|
|
|
nir_builder_instr_insert(build, &intrin->instr);
|
|
|
|
|
% if opcode.has_dest:
|
|
|
|
|
return &intrin->dest.ssa;
|
|
|
|
|
% else:
|
|
|
|
|
return intrin;
|
|
|
|
|
% endif
|
2018-03-15 18:42:44 -04:00
|
|
|
}
|
|
|
|
|
% endfor
|
|
|
|
|
|
2020-09-07 13:55:38 +01:00
|
|
|
% for name, opcode in sorted(INTR_OPCODES.items()):
|
|
|
|
|
% if opcode.indices:
|
2020-11-27 09:55:11 +00:00
|
|
|
#ifdef __cplusplus
|
2023-06-26 14:23:08 +02:00
|
|
|
#define ${intrinsic_prefix(name)}_${name}(build${intrinsic_macro_list(opcode)}, ...) ${'\\\\'}
|
2020-11-27 09:55:11 +00:00
|
|
|
_nir_build_${name}(build${intrinsic_macro_list(opcode)}, _nir_${name}_indices{0, __VA_ARGS__})
|
|
|
|
|
#else
|
2023-06-26 14:23:08 +02:00
|
|
|
#define ${intrinsic_prefix(name)}_${name}(build${intrinsic_macro_list(opcode)}, ...) ${'\\\\'}
|
2020-09-07 13:55:38 +01:00
|
|
|
_nir_build_${name}(build${intrinsic_macro_list(opcode)}, (struct _nir_${name}_indices){0, __VA_ARGS__})
|
2020-11-27 09:55:11 +00:00
|
|
|
#endif
|
2020-09-07 13:55:38 +01:00
|
|
|
% else:
|
2023-06-26 14:23:08 +02:00
|
|
|
#define nir_${name} _nir_build_${name}
|
2020-09-07 13:55:38 +01:00
|
|
|
% endif
|
2023-06-26 14:23:08 +02:00
|
|
|
% if name in build_prefixed_intrinsics:
|
2020-09-07 13:55:38 +01:00
|
|
|
#define nir_${name} nir_build_${name}
|
2023-06-26 14:23:08 +02:00
|
|
|
% endif
|
2020-09-07 13:55:38 +01:00
|
|
|
% endfor
|
|
|
|
|
|
2023-05-08 09:01:51 +02:00
|
|
|
% for name in ['flt', 'fge', 'feq', 'fneu']:
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
|
|
|
|
nir_${name}_imm(nir_builder *build, nir_def *src1, double src2)
|
2023-05-08 09:01:51 +02:00
|
|
|
{
|
|
|
|
|
return nir_${name}(build, src1, nir_imm_floatN_t(build, src2, src1->bit_size));
|
|
|
|
|
}
|
|
|
|
|
% endfor
|
|
|
|
|
|
|
|
|
|
% for name in ['ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
|
|
|
|
nir_${name}_imm(nir_builder *build, nir_def *src1, uint64_t src2)
|
2023-05-08 09:01:51 +02:00
|
|
|
{
|
|
|
|
|
return nir_${name}(build, src1, nir_imm_intN_t(build, src2, src1->bit_size));
|
|
|
|
|
}
|
|
|
|
|
% endfor
|
|
|
|
|
|
2023-06-05 10:19:58 +02:00
|
|
|
% for prefix in ['i', 'u']:
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
|
|
|
|
nir_${prefix}gt_imm(nir_builder *build, nir_def *src1, uint64_t src2)
|
2023-06-05 10:19:58 +02:00
|
|
|
{
|
|
|
|
|
return nir_${prefix}lt(build, nir_imm_intN_t(build, src2, src1->bit_size), src1);
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
static inline nir_def *
|
|
|
|
|
nir_${prefix}le_imm(nir_builder *build, nir_def *src1, uint64_t src2)
|
2023-06-05 10:19:58 +02:00
|
|
|
{
|
|
|
|
|
return nir_${prefix}ge(build, nir_imm_intN_t(build, src2, src1->bit_size), src1);
|
|
|
|
|
}
|
|
|
|
|
% endfor
|
|
|
|
|
|
2014-11-10 11:16:30 -08:00
|
|
|
#endif /* _NIR_BUILDER_OPCODES_ */"""
|
|
|
|
|
|
2022-11-29 13:28:12 -06:00
|
|
|
from nir_opcodes import opcodes, type_size, type_base_type
|
2022-01-06 18:04:21 +00:00
|
|
|
from nir_intrinsics import INTR_OPCODES, WRITE_MASK, ALIGN_MUL
|
2014-11-10 11:16:30 -08:00
|
|
|
from mako.template import Template
|
|
|
|
|
|
2023-06-26 14:23:08 +02:00
|
|
|
# List of intrinsics that also need a nir_build_ prefixed factory macro.
|
|
|
|
|
build_prefixed_intrinsics = [
|
|
|
|
|
"load_deref",
|
|
|
|
|
"store_deref",
|
|
|
|
|
"copy_deref",
|
|
|
|
|
"memcpy_deref",
|
|
|
|
|
|
|
|
|
|
"load_param",
|
|
|
|
|
|
|
|
|
|
"load_global",
|
|
|
|
|
"load_global_constant",
|
|
|
|
|
"store_global",
|
|
|
|
|
|
nir: Add intrinsics for register access
Note the writemask handling is chosen for consistency with the rest of NIR. In
every other instance, writemask=w requires a vec4 source. This is hardcoded into
nir_validate and nir_print as what it means to have a writemask.
More importantly, consistency with how register writemasks currently work.
nir_print hides it, but r0.w = fneg ssa_1.x is actually a vec4 instruction with
source ssa_1.xxxx. As a silly example nir_dest_num_components(that) = 4 in the
old model. I realize this is quite strange coming from a scalar ISA, but it's
perfectly natural for the class of vec4 hardware for which this was designed. In
that hardware, conceptually all instructions are vec4`, so the sequence "fneg
ssa_1 and write to channel w" is implemented as "fneg a vec4 with ssa_1.x in the
last component and write that vec4 out but mask to write only the w channel".
Isn't this inefficient? It can be. To save power, Midgard has scalar ALUs in
addition to vec4 ALUs. Those details are confined to the backend VLIW scheduler;
the instruction selection is still done as vec4. This mechanism has little in
common with AMD's SALUs. Midgard has a wave size of 1, with special hacks for
derivatives.
As a result, all backends consuming register writemasks are expecting this
pattern of code. Changing the store to take a vec1 instead of a vec4 would
require changing every backend to reswizzle the sources to resurrect the vec4. I
started typing a branch to do this yesterday, but it made a mess of both Midgard
and nir-to-tgsi. Without any good reason to think it'd actually help
performance, I abandoned the idea. Getting all 15 backends converted to the
helpers is enough of a challenge without forcing 10 backends to reswizzle their
sources too.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23089>
2023-05-16 11:19:49 -04:00
|
|
|
"load_reg",
|
|
|
|
|
"store_reg",
|
|
|
|
|
|
2023-06-26 14:23:08 +02:00
|
|
|
"deref_mode_is",
|
|
|
|
|
]
|
|
|
|
|
|
2022-11-29 13:28:12 -06:00
|
|
|
print(Template(template).render(opcodes=opcodes,
|
|
|
|
|
type_size=type_size,
|
|
|
|
|
type_base_type=type_base_type,
|
|
|
|
|
INTR_OPCODES=INTR_OPCODES,
|
|
|
|
|
WRITE_MASK=WRITE_MASK,
|
2023-06-26 14:23:08 +02:00
|
|
|
ALIGN_MUL=ALIGN_MUL,
|
|
|
|
|
build_prefixed_intrinsics=build_prefixed_intrinsics))
|