nir: Add intrinsics for lowering UBOs/VBOs on AGX

We'll use formatted loads and some system values to lower UBOs and VBOs to
global memory in NIR, using the AGX-specific format support and addressing
arithmetic to optimize the emitted code.

Add the intrinsics and teach nir_opt_preamble how to move them so we don't
regress UBO pushing.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19996>
This commit is contained in:
Alyssa Rosenzweig 2022-11-24 20:40:42 -05:00 committed by Marge Bot
parent a855bdbf47
commit 0af08acca5
2 changed files with 28 additions and 0 deletions

View file

@ -278,6 +278,9 @@ index("bool", "synchronous")
# Value ID to identify SSA value loaded/stored on the stack
index("unsigned", "value_id")
# Whether to sign-extend offsets in address arithmatic (else zero extend)
index("bool", "sign_extend")
intrinsic("nop", flags=[CAN_ELIMINATE])
intrinsic("convert_alu_types", dest_comp=0, src_comp=[0],
@ -1591,6 +1594,22 @@ store("local_pixel_agx", [1], [BASE, WRITE_MASK, FORMAT], [CAN_REORDER])
intrinsic("block_image_store_agx", [1, 1], bit_sizes=[32, 16],
indices=[FORMAT, IMAGE_DIM], flags=[CAN_REORDER])
# Formatted loads. The format is the pipe_format in memory (see
# agx_internal_formats.h for the supported list). This accesses:
#
# address + extend(index) << (format shift + shift)
#
# The nir_intrinsic_base() index encodes the shift. The sign_extend index
# determines whether sign- or zero-extension is used for the index.
#
# All loads on AGX uses these hardware instructions, so while these are
# logically load_global_agx (etc), the _global is omitted as it adds nothing.
#
# src[] = { address, index }.
load("agx", [1, 1], [ACCESS, BASE, FORMAT, SIGN_EXTEND], [CAN_ELIMINATE])
load("constant_agx", [1, 1], [ACCESS, BASE, FORMAT, SIGN_EXTEND],
[CAN_ELIMINATE, CAN_REORDER])
# Logical complement of load_front_face, mapping to an AGX system value
system_value("back_face_agx", 1, bit_sizes=[1, 32])
@ -1598,6 +1617,12 @@ system_value("back_face_agx", 1, bit_sizes=[1, 32])
# the referenced array has stride 24.
system_value("texture_base_agx", 1, bit_sizes=[64])
# Load the base address of an indexed UBO/VBO (for lowering UBOs/VBOs)
intrinsic("load_ubo_base_agx", src_comp=[1], dest_comp=1, bit_sizes=[64],
flags=[CAN_ELIMINATE, CAN_REORDER])
intrinsic("load_vbo_base_agx", src_comp=[1], dest_comp=1, bit_sizes=[64],
flags=[CAN_ELIMINATE, CAN_REORDER])
# Intel-specific query for loading from the brw_image_param struct passed
# into the shader as a uniform. The variable is a deref to the image
# variable. The const index specifies which of the six parameters to load.

View file

@ -153,6 +153,8 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
case nir_intrinsic_load_cull_any_enabled_amd:
case nir_intrinsic_load_cull_small_prim_precision_amd:
case nir_intrinsic_load_texture_base_agx:
case nir_intrinsic_load_ubo_base_agx:
case nir_intrinsic_load_vbo_base_agx:
return true;
/* Intrinsics which can be moved depending on hardware */
@ -195,6 +197,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
case nir_intrinsic_masked_swizzle_amd:
case nir_intrinsic_load_ssbo_address:
case nir_intrinsic_bindless_resource_ir3:
case nir_intrinsic_load_constant_agx:
return can_move_srcs(&instr->instr, ctx);
/* Image/SSBO loads can be moved if they are CAN_REORDER and their