mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-21 13:18:09 +02:00
The smp emission code will be moved back to pco_nir_tex following the addition of the updated tq shader gen code. Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
395 lines
13 KiB
C
395 lines
13 KiB
C
/*
|
|
* Copyright © 2023 Imagination Technologies Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan_core.h>
|
|
|
|
#include "hwdef/rogue_hw_utils.h"
|
|
#include "pvr_bo.h"
|
|
#include "pvr_common.h"
|
|
#include "pvr_device_info.h"
|
|
#include "pvr_job_transfer.h"
|
|
#include "pvr_pds.h"
|
|
#include "pvr_private.h"
|
|
#include "pvr_transfer_frag_store.h"
|
|
#include "pvr_types.h"
|
|
#include "pvr_usc.h"
|
|
#include "usc/pvr_uscgen.h"
|
|
#include "util/hash_table.h"
|
|
#include "util/macros.h"
|
|
#include "util/ralloc.h"
|
|
#include "util/u_dynarray.h"
|
|
#include "util/u_math.h"
|
|
#include "vk_log.h"
|
|
|
|
#define PVR_TRANSFER_BYTE_UNWIND_MAX 16U
|
|
|
|
struct pvr_transfer_frag_store_entry_data {
|
|
pvr_dev_addr_t kick_usc_pds_offset;
|
|
struct pvr_bo *kick_usc_pds_upload;
|
|
|
|
struct pvr_suballoc_bo *usc_upload;
|
|
struct pvr_tq_frag_sh_reg_layout sh_reg_layout;
|
|
};
|
|
|
|
#define to_pvr_entry_data(_entry) \
|
|
_Generic((_entry), \
|
|
struct hash_entry *: (struct pvr_transfer_frag_store_entry_data *)((_entry)->data), \
|
|
const struct hash_entry *: (const struct pvr_transfer_frag_store_entry_data *)((_entry)->data))
|
|
|
|
VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
|
|
struct pvr_transfer_frag_store *store)
|
|
{
|
|
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
|
|
*store = (struct pvr_transfer_frag_store){
|
|
.max_multisample = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 1U),
|
|
.hash_table = _mesa_hash_table_create_u32_keys(NULL),
|
|
};
|
|
|
|
if (!store->hash_table)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* \brief Returns a key based on shader properties.
|
|
*
|
|
* Returns a unique key that can be used to uniquely identify a transfer
|
|
* fragment shader based on the provided shader properties.
|
|
*
|
|
* Make sure that the non valid parts of shader_props are memset to 0. Otherwise
|
|
* these bits might appear in the key as uninitialized data and might not
|
|
* match a key for the same shader.
|
|
*/
|
|
static uint32_t pvr_transfer_frag_shader_key(
|
|
uint32_t max_multisample,
|
|
const struct pvr_tq_shader_properties *shader_props)
|
|
{
|
|
const struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
|
|
uint32_t resolve_op_num = max_multisample + PVR_RESOLVE_SAMPLE0;
|
|
|
|
uint32_t num_layers_bits = util_logbase2_ceil(PVR_TRANSFER_MAX_LAYERS + 1U);
|
|
uint32_t layer_float_bits = util_logbase2_ceil(PVR_INT_COORD_SET_FLOATS_NUM);
|
|
uint32_t pixel_src_bits = util_logbase2_ceil(PVR_TRANSFER_PBE_PIXEL_SRC_NUM);
|
|
uint32_t byte_unwind_bits = util_logbase2_ceil(PVR_TRANSFER_BYTE_UNWIND_MAX);
|
|
uint32_t resolve_op_bits = util_logbase2_ceil(resolve_op_num);
|
|
uint32_t sample_cnt_bits = util_last_bit(util_logbase2(max_multisample));
|
|
uint32_t hash = 0U;
|
|
|
|
#if MESA_DEBUG
|
|
uint32_t max_shift = 0U;
|
|
# define shift_hash(hash, num) \
|
|
do { \
|
|
max_shift += (num); \
|
|
assert(max_shift <= 32U); \
|
|
\
|
|
(hash) <<= (num); \
|
|
} while (0U)
|
|
#else
|
|
# define shift_hash(hash, num) hash <<= (num)
|
|
#endif
|
|
|
|
/* Hash layer info. */
|
|
|
|
shift_hash(hash, layer_float_bits);
|
|
hash |= (uint32_t)shader_props->layer_props.layer_floats;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= layer->sample;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t) false;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t) false;
|
|
|
|
shift_hash(hash, pixel_src_bits);
|
|
hash |= (uint32_t)layer->pbe_format;
|
|
|
|
shift_hash(hash, resolve_op_bits);
|
|
hash |= (uint32_t)layer->resolve_op;
|
|
|
|
assert(util_is_power_of_two_nonzero(layer->sample_count));
|
|
shift_hash(hash, sample_cnt_bits);
|
|
hash |= (uint32_t)util_logbase2(layer->sample_count);
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t)layer->msaa;
|
|
|
|
shift_hash(hash, byte_unwind_bits);
|
|
hash |= layer->byte_unwind;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t)layer->linear;
|
|
|
|
/* End layer info. */
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t)shader_props->full_rate;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t)shader_props->iterated;
|
|
|
|
shift_hash(hash, 1U);
|
|
hash |= (uint32_t)shader_props->pick_component;
|
|
|
|
shift_hash(hash, num_layers_bits);
|
|
/* Just 1 layer. */
|
|
hash |= 1;
|
|
|
|
shift_hash(hash, 3U);
|
|
/* alpha type none */
|
|
hash |= 0;
|
|
|
|
#undef shift_hash
|
|
|
|
return hash;
|
|
}
|
|
|
|
#define to_hash_table_key(_key) ((void *)(uintptr_t)(_key))
|
|
|
|
static VkResult pvr_transfer_frag_store_entry_data_compile(
|
|
struct pvr_device *device,
|
|
struct pvr_transfer_frag_store_entry_data *const entry_data,
|
|
const struct pvr_tq_shader_properties *shader_props,
|
|
uint32_t *const num_usc_temps_out)
|
|
{
|
|
const uint32_t image_desc_offset =
|
|
offsetof(struct pvr_combined_image_sampler_descriptor, image) / 4;
|
|
const uint32_t sampler_desc_offset =
|
|
offsetof(struct pvr_combined_image_sampler_descriptor, sampler) / 4;
|
|
|
|
const uint32_t cache_line_size =
|
|
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
|
|
|
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
|
|
uint32_t next_free_sh_reg = 0;
|
|
VkResult result;
|
|
|
|
/* TODO: Allocate all combined image samplers if needed? Otherwise change the
|
|
* array to a single descriptor.
|
|
*/
|
|
sh_reg_layout->combined_image_samplers.offsets[0].image =
|
|
next_free_sh_reg + image_desc_offset;
|
|
sh_reg_layout->combined_image_samplers.offsets[0].sampler =
|
|
next_free_sh_reg + sampler_desc_offset;
|
|
sh_reg_layout->combined_image_samplers.count = 1;
|
|
next_free_sh_reg += sizeof(struct pvr_combined_image_sampler_descriptor) / 4;
|
|
|
|
/* TODO: Handle dynamic_const_regs used for PVR_INT_COORD_SET_FLOATS_{4,6}, Z
|
|
* position, texel unwind, etc. when compiler adds support for them.
|
|
*/
|
|
sh_reg_layout->dynamic_consts.offset = next_free_sh_reg;
|
|
sh_reg_layout->dynamic_consts.count = 0;
|
|
|
|
sh_reg_layout->driver_total = next_free_sh_reg;
|
|
|
|
pco_shader *tq =
|
|
pvr_uscgen_tq(device->pdevice->pco_ctx, shader_props, sh_reg_layout);
|
|
|
|
*num_usc_temps_out = pco_shader_data(tq)->common.temps;
|
|
|
|
result = pvr_gpu_upload_usc(device,
|
|
pco_shader_binary_data(tq),
|
|
pco_shader_binary_size(tq),
|
|
cache_line_size,
|
|
&entry_data->usc_upload);
|
|
|
|
ralloc_free(tq);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_transfer_frag_store_entry_data_create(
|
|
struct pvr_device *device,
|
|
struct pvr_transfer_frag_store *store,
|
|
const struct pvr_tq_shader_properties *shader_props,
|
|
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
|
|
{
|
|
struct pvr_pds_kickusc_program kick_usc_pds_prog = { 0 };
|
|
struct pvr_transfer_frag_store_entry_data *entry_data;
|
|
pvr_dev_addr_t dev_addr;
|
|
uint32_t num_usc_temps;
|
|
VkResult result;
|
|
|
|
entry_data = ralloc(store->hash_table, __typeof__(*entry_data));
|
|
if (!entry_data)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
result = pvr_transfer_frag_store_entry_data_compile(device,
|
|
entry_data,
|
|
shader_props,
|
|
&num_usc_temps);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_entry;
|
|
|
|
dev_addr = entry_data->usc_upload->dev_addr;
|
|
dev_addr.addr -= device->heaps.usc_heap->base_addr.addr;
|
|
|
|
pvr_pds_setup_doutu(&kick_usc_pds_prog.usc_task_control,
|
|
dev_addr.addr,
|
|
num_usc_temps,
|
|
shader_props->full_rate
|
|
? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL
|
|
: ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
|
false);
|
|
|
|
pvr_pds_kick_usc(&kick_usc_pds_prog, NULL, 0U, false, PDS_GENERATE_SIZES);
|
|
|
|
result = pvr_bo_alloc(device,
|
|
device->heaps.pds_heap,
|
|
PVR_DW_TO_BYTES(kick_usc_pds_prog.data_size +
|
|
kick_usc_pds_prog.code_size),
|
|
16,
|
|
PVR_BO_ALLOC_FLAG_CPU_MAPPED,
|
|
&entry_data->kick_usc_pds_upload);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_usc_upload;
|
|
|
|
pvr_pds_kick_usc(&kick_usc_pds_prog,
|
|
entry_data->kick_usc_pds_upload->bo->map,
|
|
0U,
|
|
false,
|
|
PDS_GENERATE_CODEDATA_SEGMENTS);
|
|
|
|
dev_addr = entry_data->kick_usc_pds_upload->vma->dev_addr;
|
|
dev_addr.addr -= device->heaps.pds_heap->base_addr.addr;
|
|
entry_data->kick_usc_pds_offset = dev_addr;
|
|
|
|
*entry_data_out = entry_data;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_free_usc_upload:
|
|
pvr_bo_suballoc_free(entry_data->usc_upload);
|
|
|
|
err_free_entry:
|
|
ralloc_free(entry_data);
|
|
|
|
return result;
|
|
}
|
|
|
|
static void inline pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
|
|
struct pvr_device *device,
|
|
const struct pvr_transfer_frag_store_entry_data *entry_data)
|
|
{
|
|
pvr_bo_free(device, entry_data->kick_usc_pds_upload);
|
|
pvr_bo_suballoc_free(entry_data->usc_upload);
|
|
}
|
|
|
|
static void inline pvr_transfer_frag_store_entry_data_destroy(
|
|
struct pvr_device *device,
|
|
const struct pvr_transfer_frag_store_entry_data *entry_data)
|
|
{
|
|
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(device,
|
|
entry_data);
|
|
/* Casting away the const :( */
|
|
ralloc_free((void *)entry_data);
|
|
}
|
|
|
|
static VkResult pvr_transfer_frag_store_get_entry(
|
|
struct pvr_device *device,
|
|
struct pvr_transfer_frag_store *store,
|
|
const struct pvr_tq_shader_properties *shader_props,
|
|
const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
|
|
{
|
|
const uint32_t key =
|
|
pvr_transfer_frag_shader_key(store->max_multisample, shader_props);
|
|
const struct hash_entry *entry;
|
|
VkResult result;
|
|
|
|
entry = _mesa_hash_table_search(store->hash_table, to_hash_table_key(key));
|
|
if (!entry) {
|
|
/* Init so that gcc stops complaining. */
|
|
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
|
|
|
|
result = pvr_transfer_frag_store_entry_data_create(device,
|
|
store,
|
|
shader_props,
|
|
&entry_data);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
assert(entry_data);
|
|
|
|
entry = _mesa_hash_table_insert(store->hash_table,
|
|
to_hash_table_key(key),
|
|
(void *)entry_data);
|
|
if (!entry) {
|
|
pvr_transfer_frag_store_entry_data_destroy(device, entry_data);
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
}
|
|
|
|
*entry_data_out = to_pvr_entry_data(entry);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkResult pvr_transfer_frag_store_get_shader_info(
|
|
struct pvr_device *device,
|
|
struct pvr_transfer_frag_store *store,
|
|
const struct pvr_tq_shader_properties *shader_props,
|
|
pvr_dev_addr_t *const pds_dev_addr_out,
|
|
const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out)
|
|
{
|
|
/* Init so that gcc stops complaining. */
|
|
const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
|
|
VkResult result;
|
|
|
|
result = pvr_transfer_frag_store_get_entry(device,
|
|
store,
|
|
shader_props,
|
|
&entry_data);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
*pds_dev_addr_out = entry_data->kick_usc_pds_offset;
|
|
*reg_layout_out = &entry_data->sh_reg_layout;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void pvr_transfer_frag_store_fini(struct pvr_device *device,
|
|
struct pvr_transfer_frag_store *store)
|
|
{
|
|
hash_table_foreach_remove(store->hash_table, entry)
|
|
{
|
|
/* ralloc_free() in _mesa_hash_table_destroy() will free each entry's
|
|
* memory so let's not waste extra time freeing them one by one and
|
|
* unliking.
|
|
*/
|
|
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
|
|
device,
|
|
to_pvr_entry_data(entry));
|
|
}
|
|
|
|
_mesa_hash_table_destroy(store->hash_table, NULL);
|
|
}
|