intel/cl: switch to SPIRV as shader storage

Effectively making intel-clc not needed.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Dylan Baker <dylan@pnwbakers.com>
Tested-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Dylan Baker <None>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33014>
This commit is contained in:
Lionel Landwerlin 2025-01-14 16:09:53 +02:00 committed by Marge Bot
parent b3033dc633
commit db11165c07
16 changed files with 181 additions and 167 deletions

View file

@ -137,11 +137,11 @@ debian-testing-asan:
-D b_sanitize=address
-D valgrind=disabled
-D tools=dlclose-skip
-D intel-clc=system
-D mesa-clc=system
S3_ARTIFACT_NAME: ""
ARTIFACTS_DEBUG_SYMBOLS: 1
# Do a host build for intel-clc (asan complains not being loaded
# as the first library)
# Do a host build for mesa-clc (asan complains not being loaded as
# the first library)
HOST_BUILD_OPTIONS: >
-D build-tests=false
-D enable-glcpp-tests=false
@ -151,8 +151,8 @@ debian-testing-asan:
-D video-codecs=
-D glx=disabled
-D platforms=
-D intel-clc=enabled
-D install-intel-clc=true
-D mesa-clc=enabled
-D install-mesa-clc=true
debian-testing-msan:
# https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo
@ -166,7 +166,7 @@ debian-testing-msan:
EXTRA_OPTION:
-D b_sanitize=memory
-D b_lundef=false
-D intel-clc=system
-D mesa-clc=system
S3_ARTIFACT_NAME: ""
ARTIFACTS_DEBUG_SYMBOLS: 1
# Don't run all the tests yet:
@ -175,8 +175,8 @@ debian-testing-msan:
MESON_TEST_ARGS: "--suite glcpp --suite format"
GALLIUM_DRIVERS: "freedreno,iris,nouveau,r300,r600,llvmpipe,softpipe,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus"
VULKAN_DRIVERS: intel,amd,broadcom,virtio
# Do a host build for intel-clc (msan complains about
# uninitialized values in the LLVM libs)
# Do a host build for mesa-clc (msan complains about uninitialized
# values in the LLVM libs)
HOST_BUILD_OPTIONS: >
-D build-tests=false
-D enable-glcpp-tests=false
@ -186,8 +186,8 @@ debian-testing-msan:
-D video-codecs=
-D glx=disabled
-D platforms=
-D intel-clc=enabled
-D install-intel-clc=true
-D mesa-clc=enabled
-D install-mesa-clc=true
debian-testing-ubsan:
extends:
@ -202,7 +202,7 @@ debian-testing-ubsan:
-Wno-error=array-bounds
EXTRA_OPTION: >
-D b_sanitize=undefined
-D intel-clc=system
-D mesa-clc=system
S3_ARTIFACT_NAME: ""
ARTIFACTS_DEBUG_SYMBOLS: 1
HOST_BUILD_OPTIONS: >
@ -214,8 +214,8 @@ debian-testing-ubsan:
-D video-codecs=
-D glx=disabled
-D platforms=
-D intel-clc=enabled
-D install-intel-clc=true
-D mesa-clc=enabled
-D install-mesa-clc=true
debian-build-testing:
extends: .meson-build
@ -311,7 +311,7 @@ debian-release:
-D spirv-to-dxil=true
-D osmesa=true
-D tools=all
-D intel-clc=enabled
-D mesa-clc=enabled
-D intel-rt=enabled
-D imagination-srv=true
BUILDTYPE: "release"
@ -444,7 +444,7 @@ debian-android:
-D cpp_rtti=false
-D valgrind=disabled
-D android-libbacktrace=disabled
-D intel-clc=system
-D mesa-clc=system
GALLIUM_ST: >
-D gallium-vdpau=disabled
-D gallium-va=disabled
@ -462,8 +462,8 @@ debian-android:
-D video-codecs=
-D glx=disabled
-D platforms=
-D intel-clc=enabled
-D install-intel-clc=true
-D mesa-clc=enabled
-D install-mesa-clc=true
ARTIFACTS_DEBUG_SYMBOLS: 1
S3_ARTIFACT_NAME: mesa-x86_64-android-${BUILDTYPE}
script:
@ -710,7 +710,7 @@ debian-clang:
-D tools=drm-shim,etnaviv,freedreno,glsl,intel,intel-ui,nir,nouveau,lima,panfrost,asahi,imagination
-D vulkan-layers=device-select,overlay
-D build-aco-tests=true
-D intel-clc=enabled
-D mesa-clc=enabled
-D intel-rt=enabled
-D imagination-srv=true
-D teflon=true
@ -806,7 +806,7 @@ debian-x86_32:
-D glvnd=disabled
EXTRA_OPTION: >
-D vulkan-layers=device-select,overlay
-D intel-clc=system
-D mesa-clc=system
HOST_BUILD_OPTIONS: >
-D build-tests=false
-D enable-glcpp-tests=false
@ -816,8 +816,8 @@ debian-x86_32:
-D video-codecs=
-D glx=disabled
-D platforms=
-D intel-clc=enabled
-D install-intel-clc=true
-D mesa-clc=enabled
-D install-mesa-clc=true
# While s390 is dead, s390x is very much alive, and one of the last major
# big-endian platforms, so it provides useful coverage.

View file

@ -35,8 +35,8 @@ variables:
DEBIAN_PYUTILS_IMAGE: "debian/x86_64_pyutils"
DEBIAN_PYUTILS_TAG: "20241223-pyutils"
ALPINE_X86_64_BUILD_TAG: "20241122-sections"
ALPINE_X86_64_LAVA_SSH_TAG: "20241122-sections"
ALPINE_X86_64_BUILD_TAG: "20250124-spirv-tools"
ALPINE_X86_64_LAVA_SSH_TAG: "20250124-spirv-tools"
FEDORA_X86_64_BUILD_TAG: "20241122-sections"
KERNEL_TAG: "v6.13-rc4-mesa-5e77"

View file

@ -44,19 +44,26 @@
#include "libintel_shaders.h"
#if GFX_VERx10 == 80
# include "intel_gfx8_shaders_code.h"
# include "intel_gfx80_shaders_spv.h"
# include "intel_gfx80_shaders_binding.h"
#elif GFX_VERx10 == 90
# include "intel_gfx9_shaders_code.h"
# include "intel_gfx90_shaders_spv.h"
# include "intel_gfx90_shaders_binding.h"
#elif GFX_VERx10 == 110
# include "intel_gfx11_shaders_code.h"
# include "intel_gfx110_shaders_spv.h"
# include "intel_gfx110_shaders_binding.h"
#elif GFX_VERx10 == 120
# include "intel_gfx12_shaders_code.h"
# include "intel_gfx120_shaders_spv.h"
# include "intel_gfx120_shaders_binding.h"
#elif GFX_VERx10 == 125
# include "intel_gfx125_shaders_code.h"
# include "intel_gfx125_shaders_spv.h"
# include "intel_gfx125_shaders_binding.h"
#elif GFX_VERx10 == 200
# include "intel_gfx20_shaders_code.h"
# include "intel_gfx200_shaders_spv.h"
# include "intel_gfx200_shaders_binding.h"
#elif GFX_VERx10 == 300
# include "intel_gfx30_shaders_code.h"
# include "intel_gfx300_shaders_spv.h"
# include "intel_gfx300_shaders_binding.h"
#else
# error "Unsupported generation"
#endif
@ -75,20 +82,11 @@ load_fragment_index(nir_builder *b)
nir_channel(b, pos_in, 0));
}
static nir_shader *
load_shader_lib(struct iris_screen *screen, void *mem_ctx)
static const uint32_t *
load_shader_lib_spv(uint32_t *out_size)
{
const nir_shader_compiler_options *nir_options =
#if GFX_VER >= 9
screen->brw->nir_options[MESA_SHADER_KERNEL];
#else
screen->elk->nir_options[MESA_SHADER_KERNEL];
#endif
struct blob_reader blob;
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
sizeof(genX(intel_shaders_nir)));
return nir_deserialize(mem_ctx, nir_options, &blob);
*out_size = sizeof(genX(shaders_spv));
return genX(shaders_spv);
}
static unsigned
@ -114,7 +112,7 @@ iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
void
genX(init_screen_gen_state)(struct iris_screen *screen)
{
screen->vtbl.load_shader_lib = load_shader_lib;
screen->vtbl.load_shader_lib_spv = load_shader_lib_spv;
screen->vtbl.call_generation_shader = iris_call_generation_shader;
}

View file

@ -324,8 +324,13 @@ iris_destroy_program_cache(struct iris_context *ice)
}
static void
link_libintel_shaders(nir_shader *nir, const nir_shader *libintel)
link_libintel_shaders(nir_shader *nir,
const struct intel_device_info *devinfo,
const uint32_t *spv_code, uint32_t spv_size)
{
nir_shader *libintel = brw_nir_from_spirv(nir, devinfo->ver,
spv_code, spv_size, true);
nir_link_shader_functions(nir, libintel);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_remove_non_entrypoints);
@ -368,9 +373,12 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
uint32_t uniform_size =
screen->vtbl.call_generation_shader(screen, &b);
uint32_t spv_size;
const uint32_t *spv_code = screen->vtbl.load_shader_lib_spv(&spv_size);
nir_shader *nir = b.shader;
link_libintel_shaders(nir, screen->vtbl.load_shader_lib(screen, nir));
link_libintel_shaders(nir, screen->devinfo, spv_code, spv_size);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_opt_cse);

View file

@ -159,7 +159,7 @@ struct iris_vtable {
void (*lost_genx_state)(struct iris_context *ice, struct iris_batch *batch);
void (*disable_rhwo_optimization)(struct iris_batch *batch, bool disable);
nir_shader *(*load_shader_lib)(struct iris_screen *screen, void *mem_ctx);
const uint32_t *(*load_shader_lib_spv)(uint32_t *out_size);
unsigned (*call_generation_shader)(struct iris_screen *screen, nir_builder *b);
};

View file

@ -243,4 +243,3 @@ elk_disasm_tool = executable(
)
endif

View file

@ -5,33 +5,9 @@
#ifndef __GENX_CL_HELPERS_H__
#define __GENX_CL_HELPERS_H__
#define ALWAYS_INLINE inline __attribute__((always_inline))
#include "compiler/libcl/libcl.h"
#define UNUSED
#define BITFIELD64_MASK(bits) ((1ul << bits) - 1)
#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
#define INT64_MAX (0x7FFFFFFFFFFFFFFFL)
ALWAYS_INLINE static uint64_t
u_uintN_max(uint32_t bits)
{
return (1ul << bits) - 1;
}
ALWAYS_INLINE static int64_t
u_intN_max(uint32_t bit_size)
{
return INT64_MAX >> (64 - bit_size);
}
ALWAYS_INLINE static int64_t
u_intN_min(uint32_t bit_size)
{
/* On 2's compliment platforms, which is every platform Mesa is likely to
* every worry about, stdint.h generally calculated INT##_MIN in this
* manner.
*/
return (-u_intN_max(bit_size)) - 1;
}
ALWAYS_INLINE static uint64_t
util_bitpack_uint(uint64_t v, uint32_t start, UNUSED uint32_t end)

View file

@ -9,39 +9,19 @@
#ifndef __OPENCL_VERSION__
#include <stdint.h>
#include <vulkan/vulkan_core.h>
#include "util/macros.h"
#else
#define PRAGMA_POISON(param)
#define BITFIELD_BIT(i) (1u << i)
typedef ulong uint64_t;
typedef uint uint32_t;
typedef ushort uint16_t;
typedef uchar uint8_t;
typedef long int64_t;
typedef int int32_t;
typedef short int16_t;
typedef char int8_t;
typedef struct VkDrawIndexedIndirectCommand {
uint32_t indexCount;
uint32_t instanceCount;
uint32_t firstIndex;
int32_t vertexOffset;
uint32_t firstInstance;
} VkDrawIndexedIndirectCommand __attribute__((aligned(4)));
typedef struct VkDrawIndirectCommand {
uint32_t vertexCount;
uint32_t instanceCount;
uint32_t firstVertex;
uint32_t firstInstance;
} VkDrawIndirectCommand __attribute__((aligned(4)));
#define _MESA_LIBCL_ASSERT_IGNORE 1
#include "libcl_vk.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_cl_pack.h"
#define PRAGMA_POISON(param)
#endif
/**
@ -127,6 +107,7 @@ void genX(write_draw)(global uint32_t *dst_ptr,
bool uses_draw_id,
uint32_t mocs);
void genX(copy_data)(global void *dst_ptr,
global void *src_ptr,
uint32_t size);

View file

@ -1,23 +0,0 @@
/* Copyright © 2023 Intel Corporation
* SPDX-License-Identifier: MIT
*/
void
genX(libanv_memcpy)(global void *dst_base,
global void *src_base,
uint num_dwords,
uint dword_offset)
{
global void *dst = dst_base + 4 * dword_offset;
global void *src = src_base + 4 * dword_offset;
if (dword_offset + 4 <= num_dwords) {
*(global uint4 *)(dst) = *(global uint4 *)(src);
} else if (dword_offset + 3 <= num_dwords) {
*(global uint3 *)(dst) = *(global uint3 *)(src);
} else if (dword_offset + 2 <= num_dwords) {
*(global uint2 *)(dst) = *(global uint2 *)(src);
} else if (dword_offset + 1 <= num_dwords) {
*(global uint *)(dst) = *(global uint *)(src);
}
}

View file

@ -24,7 +24,6 @@ intel_shader_files = files(
'generate.cl',
'generate_draws.cl',
'generate_draws_iris.cl',
'memcpy.cl',
'query_copy.cl',
'util.cl',
)
@ -34,18 +33,6 @@ foreach input_arg : intel_shader_files
prepended_input_args += ['--in', input_arg]
endforeach
intel_shaders_clc_wa_args = []
if with_intel_clc
if chosen_llvm_version_major >= 17
intel_shaders_clc_wa_args += ['--llvm17-wa']
endif
else
_intel_clc_llvm_version = run_command(prog_intel_clc, '-M')
if _intel_clc_llvm_version.stdout().strip().version_compare('>= 17.0')
intel_shaders_clc_wa_args += ['--llvm17-wa']
endif
endif
intel_shaders_gens = [ [ 80, 8],
[ 90, 9],
[110, 11],
@ -55,25 +42,38 @@ intel_shaders_gens = [ [ 80, 8],
[300, 30] ]
intel_shaders = []
foreach gen : intel_shaders_gens
intel_shaders += custom_target(
'intel_gfx@0@_shaders_code.h'.format(gen[1]),
intel_shaders_spv = custom_target(
input : intel_shader_files,
output : 'intel_gfx@0@_shaders_code.h'.format(gen[1]),
output : 'intel_gfx@0@_shaders.spv'.format(gen[0]),
command : [
prog_intel_clc, intel_shaders_clc_wa_args, '--nir',
'--gfx-version=@0@'.format(gen[0] / 10),
'--prefix', 'gfx@0@_intel_shaders'.format(gen[1]),
prog_mesa_clc,
prepended_input_args, '-o', '@OUTPUT@', '--',
'-cl-std=cl2.0', '-D__OPENCL_VERSION__=200',
'-DNDEBUG=1',
'-DGFX_VERx10=@0@'.format(gen[0]),
'-I' + join_paths(meson.current_source_dir(), '.'),
'-I' + join_paths(dir_source_root, 'src/compiler/libcl'),
'-I' + join_paths(dir_source_root, 'src'),
'-I' + join_paths(dir_source_root, 'src/intel'),
'-I' + join_paths(meson.project_build_root(), 'src/intel'),
'-I' + join_paths(dir_source_root, 'src/intel/genxml'),
],
env: ['MESA_SHADER_CACHE_DISABLE=true'],
depends : [dep_prog_intel_clc, gen_cl_xml_pack],
depends : [gen_cl_xml_pack],
)
intel_shaders += custom_target(
input : ['spv2hex.py', intel_shaders_spv],
output : 'intel_gfx@0@_shaders_spv.h'.format(gen[0]),
command : [
prog_python, '@INPUT@', '--output', '@OUTPUT@',
'--prefix', 'gfx@0@_shaders_spv'.format(gen[1]),
],
)
intel_shaders += custom_target(
input : intel_shaders_spv,
output : 'intel_gfx@0@_shaders_binding.h'.format(gen[0]),
command : [prog_vtn_bindgen, intel_shaders_spv, '@OUTPUT@'],
)
endforeach

View file

@ -2,6 +2,8 @@
* SPDX-License-Identifier: MIT
*/
#include "libintel_shaders.h"
void
genX(libanv_query_copy)(global void *destination_base,
uint32_t destination_stride,

View file

@ -0,0 +1,43 @@
#!/usr/bin/env python3
# Copyright © 2025 Intel Corporation
# SPDX-License-Identifier: MIT
from __future__ import annotations
import argparse
import binascii
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument('--output', dest='output', action='store',
help='Output file', required=True)
p.add_argument('--prefix', action='store',
help='Prefix string to use', required=True)
p.add_argument('inputs', metavar='SPIRV', nargs='+')
args = p.parse_args()
for f in args.inputs:
with open(f, 'rb') as fin:
with open(args.output, 'w') as fout:
fout.write("#pragma one\n")
fout.write("const uint32_t {0}[] = {{".format(args.prefix))
count = 0
while True:
dword = fin.read(4)
if not dword:
break
if count % 8 == 0:
fout.write("\n ")
fout.write('{:#x}, '.format(int.from_bytes(dword, byteorder='little', signed=False)))
count += 1
fout.write("\n")
fout.write("};\n")
if __name__ == '__main__':
main()

View file

@ -4,6 +4,26 @@
#include "libintel_shaders.h"
/* Memcpy data using multiple lanes. */
void genX(libanv_memcpy)(global void *dst_base,
global void *src_base,
uint num_dwords,
uint dword_offset)
{
global void *dst = dst_base + 4 * dword_offset;
global void *src = src_base + 4 * dword_offset;
if (dword_offset + 4 <= num_dwords) {
*(global uint4 *)(dst) = *(global uint4 *)(src);
} else if (dword_offset + 3 <= num_dwords) {
*(global uint3 *)(dst) = *(global uint3 *)(src);
} else if (dword_offset + 2 <= num_dwords) {
*(global uint2 *)(dst) = *(global uint2 *)(src);
} else if (dword_offset + 1 <= num_dwords) {
*(global uint *)(dst) = *(global uint *)(src);
}
}
/* Copy size from src_ptr to dst_ptr for using a single lane with size
* multiple of 4.
*/

View file

@ -52,7 +52,7 @@ VkResult genX(init_device_state)(struct anv_device *device);
void genX(init_cps_device_state)(struct anv_device *device);
nir_shader *genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx);
const uint32_t *genX(libanv_spv)(uint32_t *out_size);
uint32_t genX(call_internal_shader)(nir_builder *b,
enum anv_internal_kernel_name shader_name);

View file

@ -48,6 +48,17 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
return true;
}
static nir_shader *
load_libanv(struct anv_device *device)
{
uint32_t spv_size;
const uint32_t *spv_code = anv_genX(device->info, libanv_spv)(&spv_size);
void *mem_ctx = ralloc_context(NULL);
return brw_nir_from_spirv(mem_ctx, device->info->ver, spv_code, spv_size, true);
}
static void
link_libanv(nir_shader *nir, const nir_shader *libanv)
{
@ -337,10 +348,7 @@ anv_device_get_internal_shader(struct anv_device *device,
return VK_SUCCESS;
}
void *mem_ctx = ralloc_context(NULL);
nir_shader *libanv_shaders =
anv_genX(device->info, load_libanv_shader)(device, mem_ctx);
nir_shader *libanv_shaders = load_libanv(device);
bin = compile_shader(device,
libanv_shaders,
@ -350,6 +358,7 @@ anv_device_get_internal_shader(struct anv_device *device,
&internal_kernels[name].key,
sizeof(internal_kernels[name].key),
internal_kernels[name].send_count);
ralloc_free(libanv_shaders);
if (bin == NULL)
return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
"Unable to compiler internal kernel");

View file

@ -9,17 +9,23 @@
#include "compiler/nir/nir_serialize.h"
#if GFX_VERx10 == 90
# include "intel_gfx9_shaders_code.h"
# include "intel_gfx90_shaders_spv.h"
# include "intel_gfx90_shaders_binding.h"
#elif GFX_VERx10 == 110
# include "intel_gfx11_shaders_code.h"
# include "intel_gfx110_shaders_spv.h"
# include "intel_gfx110_shaders_binding.h"
#elif GFX_VERx10 == 120
# include "intel_gfx12_shaders_code.h"
# include "intel_gfx120_shaders_spv.h"
# include "intel_gfx120_shaders_binding.h"
#elif GFX_VERx10 == 125
# include "intel_gfx125_shaders_code.h"
# include "intel_gfx125_shaders_spv.h"
# include "intel_gfx125_shaders_binding.h"
#elif GFX_VERx10 == 200
# include "intel_gfx20_shaders_code.h"
# include "intel_gfx200_shaders_spv.h"
# include "intel_gfx200_shaders_binding.h"
#elif GFX_VERx10 == 300
# include "intel_gfx30_shaders_code.h"
# include "intel_gfx300_shaders_spv.h"
# include "intel_gfx300_shaders_binding.h"
#else
# error "Unsupported generation"
#endif
@ -31,6 +37,13 @@
.base = offsetof(struct_name, field_name), \
.range = bit_size / 8)
const uint32_t *
genX(libanv_spv)(uint32_t *out_size)
{
*out_size = sizeof(genX(shaders_spv));
return genX(shaders_spv);
}
static nir_def *
load_fragment_index(nir_builder *b)
{
@ -46,18 +59,6 @@ load_compute_index(nir_builder *b)
return nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
}
nir_shader *
genX(load_libanv_shader)(struct anv_device *device, void *mem_ctx)
{
const nir_shader_compiler_options *nir_options =
device->physical->compiler->nir_options[MESA_SHADER_KERNEL];
struct blob_reader blob;
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
sizeof(genX(intel_shaders_nir)));
return nir_deserialize(mem_ctx, nir_options, &blob);
}
uint32_t
genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_name)
{