nir: Add load_typed_buffer_amd intrinsic.

This new intrinsic maps to the MTBUF instruction format on AMD GPUs
and represents a typed buffer load in NIR.

Also add an unsigned upper bound for the new intrinsic.
Code for that ported from aco_instruction_selection_setup.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
This commit is contained in:
Timur Kristóf 2023-02-02 10:47:58 +01:00 committed by Marge Bot
parent 7d63d8882a
commit 022e55557b
3 changed files with 33 additions and 0 deletions

View file

@ -407,6 +407,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
case nir_intrinsic_load_kernel_input:
case nir_intrinsic_load_task_payload:
case nir_intrinsic_load_buffer_amd:
case nir_intrinsic_load_typed_buffer_amd:
case nir_intrinsic_image_samples:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_bindless_image_samples:

View file

@ -182,6 +182,7 @@ index("enum glsl_sampler_dim", "image_dim")
index("bool", "image_array")
# Image format for image intrinsics
# Vertex buffer format for load_typed_buffer_amd
index("enum pipe_format", "format")
# Access qualifiers for image and memory access intrinsics. ACCESS_RESTRICT is
@ -1331,6 +1332,18 @@ intrinsic("load_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE,
# src[] = { store value, descriptor, vector byte offset, scalar byte offset, index offset }
intrinsic("store_buffer_amd", src_comp=[0, 4, 1, 1, 1], indices=[BASE, WRITE_MASK, MEMORY_MODES, ACCESS])
# Typed buffer load of arbitrary length, using a specified format.
# src[] = { descriptor, vector byte offset, scalar byte offset, index offset }
#
# The compiler backend is responsible for emitting correct HW instructions according to alignment, range etc.
# Users of this intrinsic must ensure that the first component being loaded is really the first component
# of the specified format, because range analysis assumes this.
# The size of the specified format also determines the memory range that this instruction is allowed to access.
#
# The index offset is multiplied by the stride in the descriptor, if any.
# The vector/scalar offsets are in bytes, BASE is a constant byte offset.
intrinsic("load_typed_buffer_amd", src_comp=[4, 1, 1, 1], dest_comp=0, indices=[BASE, MEMORY_MODES, ACCESS, FORMAT, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
# src[] = { address, unsigned 32-bit offset }.
load("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
# src[] = { value, address, unsigned 32-bit offset }.

View file

@ -25,6 +25,7 @@
#include "nir.h"
#include "nir_range_analysis.h"
#include "util/hash_table.h"
#include "util/u_math.h"
/**
* Analyzes a sequence of operations to determine some aspects of the range of
@ -1469,6 +1470,24 @@ nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht,
/* Very generous maximum: TCS/TES executed by largest possible workgroup */
res = config->max_workgroup_invocations / MAX2(shader->info.tess.tcs_vertices_out, 1u);
break;
case nir_intrinsic_load_typed_buffer_amd: {
const enum pipe_format format = nir_intrinsic_format(intrin);
if (format == PIPE_FORMAT_NONE)
break;
const struct util_format_description* desc = util_format_description(format);
if (desc->channel[scalar.comp].type != UTIL_FORMAT_TYPE_UNSIGNED)
break;
if (desc->channel[scalar.comp].normalized) {
res = fui(1.0);
break;
}
const uint32_t chan_max = u_uintN_max(desc->channel[scalar.comp].size);
res = desc->channel[scalar.comp].pure_integer ? chan_max : fui(chan_max);
break;
}
case nir_intrinsic_load_scalar_arg_amd:
case nir_intrinsic_load_vector_arg_amd: {
uint32_t upper_bound = nir_intrinsic_arg_upper_bound_u32_amd(intrin);