mesa/src/compiler/clc/nir_load_libclc.c

/*
 * Copyright © 2020 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "nir.h"
#include "nir_builder.h"
#include "nir_clc_helpers.h"
#include "nir_serialize.h"
#include "nir_spirv.h"
#include "util/mesa-sha1.h"

#ifdef DYNAMIC_LIBCLC_PATH
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#endif

#ifdef HAVE_STATIC_LIBCLC_ZSTD
#include <zstd.h>
#endif

#ifdef HAVE_STATIC_LIBCLC_SPIRV
#include "spirv-mesa3d-.spv.h"
#endif

#ifdef HAVE_STATIC_LIBCLC_SPIRV64
#include "spirv64-mesa3d-.spv.h"
#endif

struct clc_file {
   unsigned bit_size;
   const char *static_data;
   size_t static_data_size;
   const char *sys_path;
};

static const struct clc_file libclc_files[] = {
   {
      .bit_size = 32,
#ifdef HAVE_STATIC_LIBCLC_SPIRV
      .static_data = libclc_spirv_mesa3d_spv,
      .static_data_size = sizeof(libclc_spirv_mesa3d_spv),
#endif
#ifdef DYNAMIC_LIBCLC_PATH
      .sys_path = DYNAMIC_LIBCLC_PATH "spirv-mesa3d-.spv",
#endif
   },
   {
      .bit_size = 64,
#ifdef HAVE_STATIC_LIBCLC_SPIRV64
      .static_data = libclc_spirv64_mesa3d_spv,
      .static_data_size = sizeof(libclc_spirv64_mesa3d_spv),
#endif
#ifdef DYNAMIC_LIBCLC_PATH
      .sys_path = DYNAMIC_LIBCLC_PATH "spirv64-mesa3d-.spv",
#endif
   },
};

static const struct clc_file *
get_libclc_file(unsigned ptr_bit_size)
{
   assert(ptr_bit_size == 32 || ptr_bit_size == 64);
   return &libclc_files[ptr_bit_size / 64];
}

struct clc_data {
   const struct clc_file *file;

   unsigned char cache_key[20];

   int fd;
   const void *data;
   size_t size;
};

static bool
open_clc_data(struct clc_data *clc, unsigned ptr_bit_size)
{
   memset(clc, 0, sizeof(*clc));
   clc->file = get_libclc_file(ptr_bit_size);
   clc->fd = -1;

   if (clc->file->static_data) {
      snprintf((char *)clc->cache_key, sizeof(clc->cache_key),
               "libclc-spirv%d", ptr_bit_size);
      return true;
   }

#ifdef DYNAMIC_LIBCLC_PATH
   if (clc->file->sys_path != NULL) {
      int fd = open(clc->file->sys_path, O_RDONLY);
      if (fd < 0)
         return false;

      struct stat stat;
      int ret = fstat(fd, &stat);
      if (ret < 0) {
         fprintf(stderr, "fstat failed on %s: %m\n", clc->file->sys_path);
         close(fd);
         return false;
      }

      struct mesa_sha1 ctx;
      _mesa_sha1_init(&ctx);
      _mesa_sha1_update(&ctx, clc->file->sys_path, strlen(clc->file->sys_path));
#if defined(__APPLE__) || defined(__MACOSX)
      _mesa_sha1_update(&ctx, &stat.st_mtime, sizeof(stat.st_mtime));
#else
      _mesa_sha1_update(&ctx, &stat.st_mtim, sizeof(stat.st_mtim));
#endif
      _mesa_sha1_final(&ctx, clc->cache_key);

      clc->fd = fd;

      return true;
   }
#endif

   return false;
}

#define SPIRV_WORD_SIZE 4

static bool
map_clc_data(struct clc_data *clc)
{
   if (clc->file->static_data) {
#ifdef HAVE_STATIC_LIBCLC_ZSTD
      unsigned long long cmp_size =
         ZSTD_getFrameContentSize(clc->file->static_data,
                                  clc->file->static_data_size);
      if (cmp_size == ZSTD_CONTENTSIZE_UNKNOWN ||
          cmp_size == ZSTD_CONTENTSIZE_ERROR) {
         fprintf(stderr, "Could not determine the decompressed size of the "
                         "libclc SPIR-V\n");
         return false;
      }

      size_t frame_size =
         ZSTD_findFrameCompressedSize(clc->file->static_data,
                                      clc->file->static_data_size);
      if (ZSTD_isError(frame_size)) {
         fprintf(stderr, "Could not determine the size of the first ZSTD frame "
                         "when decompressing libclc SPIR-V: %s\n",
                 ZSTD_getErrorName(frame_size));
         return false;
      }

      void *dest = malloc(cmp_size + 1);
      size_t size = ZSTD_decompress(dest, cmp_size, clc->file->static_data,
                                    frame_size);
      if (ZSTD_isError(size)) {
         free(dest);
         fprintf(stderr, "Error decompressing libclc SPIR-V: %s\n",
                 ZSTD_getErrorName(size));
         return false;
      }

      clc->data = dest;
      clc->size = size;
#else
      clc->data = clc->file->static_data;
      clc->size = clc->file->static_data_size;
#endif
      return true;
   }

#ifdef DYNAMIC_LIBCLC_PATH
   if (clc->file->sys_path != NULL) {
      off_t len = lseek(clc->fd, 0, SEEK_END);
      if (len % SPIRV_WORD_SIZE != 0) {
         fprintf(stderr, "File length isn't a multiple of the word size\n");
         return false;
      }
      clc->size = len;

      clc->data = mmap(NULL, len, PROT_READ, MAP_PRIVATE, clc->fd, 0);
      if (clc->data == MAP_FAILED) {
         fprintf(stderr, "Failed to mmap libclc SPIR-V: %m\n");
         return false;
      }

      return true;
   }
#endif

   return true;
}

static void
close_clc_data(struct clc_data *clc)
{
   if (clc->file->static_data) {
#ifdef HAVE_STATIC_LIBCLC_ZSTD
      free((void *)clc->data);
#endif
      return;
   }

#ifdef DYNAMIC_LIBCLC_PATH
   if (clc->file->sys_path != NULL) {
      if (clc->data)
         munmap((void *)clc->data, clc->size);
      close(clc->fd);
   }
#endif
}

/** Returns true if libclc is found
 *
 * If libclc is compiled in statically, this always returns true.  If we
 * depend on a dynamic libclc, this opens and tries to stat the file.
 */
bool
nir_can_find_libclc(unsigned ptr_bit_size)
{
   struct clc_data clc;
   if (open_clc_data(&clc, ptr_bit_size)) {
      close_clc_data(&clc);
      return true;
   } else {
      return false;
   }
}

/** Adds generic pointer variants of libclc functions
 *
 * Libclc currently doesn't contain generic variants for a bunch of functions
 * like `frexp` but the OpenCL spec with generic pointers requires them.  We
 * really should fix libclc but, in the mean time, we can easily duplicate
 * every function that works on global memory and make it also work on generic
 * memory.
 */
static bool
libclc_add_generic_variants(nir_shader *shader)
{
   bool progress = false;

   nir_foreach_function(func, shader) {
      /* These don't need generic variants */
      if (strstr(func->name, "async_work_group_strided_copy"))
         continue;

      char *U3AS1 = strstr(func->name, "U3AS1");
      if (U3AS1 == NULL)
         continue;

      ptrdiff_t offset_1 = U3AS1 - func->name + 4;
      assert(offset_1 < strlen(func->name) && func->name[offset_1] == '1');

      char *generic_name = ralloc_strdup(shader, func->name);
      assert(generic_name[offset_1] == '1');
      generic_name[offset_1] = '4';

      if (nir_shader_get_function_for_name(shader, generic_name))
         continue;

      nir_function *gfunc = nir_function_create(shader, generic_name);
      gfunc->num_params = func->num_params;
      gfunc->params = ralloc_array(shader, nir_parameter, gfunc->num_params);
      for (unsigned i = 0; i < gfunc->num_params; i++)
         gfunc->params[i] = func->params[i];

      nir_function_set_impl(gfunc, nir_function_impl_clone(shader, func->impl));

      /* Rewrite any global pointers to generic */
      nir_foreach_block(block, gfunc->impl) {
         nir_foreach_instr(instr, block) {
            if (instr->type != nir_instr_type_deref)
               continue;

            nir_deref_instr *deref = nir_instr_as_deref(instr);
            if (!nir_deref_mode_may_be(deref, nir_var_mem_global))
               continue;

            assert(deref->type != nir_deref_type_var);
            assert(nir_deref_mode_is(deref, nir_var_mem_global));

            deref->modes = nir_var_mem_generic;
         }
      }

      progress = true;
      nir_progress(true, func->impl, nir_metadata_none);
   }

   if (progress) {
      nir_foreach_function_impl(impl, shader) {
         if (impl->valid_metadata & nir_metadata_not_properly_reset) {
            /* Preserve all metadata for functions that we didn't modify. */
            nir_no_progress(impl);
         }
      }
   }

   return progress;
}

static bool
mark_exact(nir_builder *b, nir_alu_instr *alu, UNUSED void *_)
{
   alu->exact = true;
   return true;
}

nir_shader *
nir_load_libclc_shader(unsigned ptr_bit_size,
                       struct disk_cache *disk_cache,
                       const struct spirv_to_nir_options *spirv_options,
                       const nir_shader_compiler_options *nir_options,
                       bool optimize)
{
   assert(ptr_bit_size ==
          nir_address_format_bit_size(spirv_options->global_addr_format));

   struct clc_data clc;
   if (!open_clc_data(&clc, ptr_bit_size))
      return NULL;

#ifdef ENABLE_SHADER_CACHE
   cache_key cache_key;
   if (disk_cache) {
      disk_cache_compute_key(disk_cache, clc.cache_key,
                             sizeof(clc.cache_key), cache_key);

      size_t buffer_size;
      uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
      if (buffer) {
         struct blob_reader blob;
         blob_reader_init(&blob, buffer, buffer_size);
         nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
         free(buffer);
         close_clc_data(&clc);
         return nir;
      }
   }
#endif

   if (!map_clc_data(&clc)) {
      close_clc_data(&clc);
      return NULL;
   }

   struct spirv_to_nir_options spirv_lib_options = *spirv_options;
   spirv_lib_options.create_library = true;

   assert(clc.size % SPIRV_WORD_SIZE == 0);
   nir_shader *nir = spirv_to_nir(clc.data, clc.size / SPIRV_WORD_SIZE,
                                  NULL, 0, MESA_SHADER_KERNEL, NULL,
                                  &spirv_lib_options, nir_options);
   nir_validate_shader(nir, "after nir_load_clc_shader");

   /* nir_inline_libclc will assume that the functions in this shader are
    * already ready to lower.  This means we need to inline any function_temp
    * initializers and lower any early returns.
    */
   nir->info.internal = true;
   NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
   NIR_PASS(_, nir, nir_lower_returns);

   NIR_PASS(_, nir, libclc_add_generic_variants);

   /* libclc relies on precise floating point behaviour to meet CL precision
    * requirements, but the SPIR-V does not disable contractions etc. Forcing
    * the exact bit across libclc effectively compiles libclc without fast-math,
    * which works around a large class of (current and future) libclc bugs.
    *
    * Kernels using CL are unaffected, this only affects the high-precision
    * floating point routines inside libclc. Fast variants bypass libclc anyway.
    */
   NIR_PASS(_, nir, nir_shader_alu_pass, mark_exact, nir_metadata_all, NULL);

   /* Run some optimization passes. Those used here should be considered safe
    * for all use cases and drivers.
    */
   if (optimize) {
      NIR_PASS(_, nir, nir_split_var_copies);

      bool progress;
      do {
         progress = false;
         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
         NIR_PASS(progress, nir, nir_lower_var_copies);
         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
         NIR_PASS(progress, nir, nir_copy_prop);
         NIR_PASS(progress, nir, nir_opt_remove_phis);
         NIR_PASS(progress, nir, nir_opt_dce);
         NIR_PASS(progress, nir, nir_opt_if, false);
         NIR_PASS(progress, nir, nir_opt_dead_cf);
         NIR_PASS(progress, nir, nir_opt_cse);
         /* drivers run this pass, so don't be too aggressive. More aggressive
          * values only increase effectiveness by <5%
          */
         nir_opt_peephole_select_options peephole_select_options = {
            .limit = 0,
         };
         NIR_PASS(progress, nir, nir_opt_peephole_select, &peephole_select_options);
         NIR_PASS(progress, nir, nir_opt_algebraic);
         NIR_PASS(progress, nir, nir_opt_constant_folding);
         NIR_PASS(progress, nir, nir_opt_undef);
         NIR_PASS(progress, nir, nir_opt_deref);
      } while(progress);

      nir_sweep(nir);
   }

#ifdef ENABLE_SHADER_CACHE
   if (disk_cache) {
      struct blob blob;
      blob_init(&blob);
      nir_serialize(&blob, nir, false);
      disk_cache_put(disk_cache, cache_key, blob.data, blob.size, NULL);
      blob_finish(&blob);
   }
#endif

   close_clc_data(&clc);
   return nir;
}