mesa/src/gallium/drivers/iris/iris_resource.c

2685 lines
89 KiB
C
Raw Normal View History

/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* @file iris_resource.c
*
* Resources are images, buffers, and other objects used by the GPU.
*
* XXX: explain resources
*/
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
2018-05-28 17:14:43 -07:00
#include "util/os_memory.h"
#include "util/u_cpu_detect.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_memory.h"
#include "util/u_threaded_context.h"
2018-06-07 01:25:35 -07:00
#include "util/u_transfer.h"
#include "util/u_transfer_helper.h"
#include "util/u_upload_mgr.h"
#include "util/ralloc.h"
#include "iris_batch.h"
#include "iris_context.h"
#include "iris_resource.h"
#include "iris_screen.h"
#include "intel/common/intel_aux_map.h"
#include "intel/dev/intel_debug.h"
2018-05-28 17:14:43 -07:00
#include "isl/isl.h"
#include "drm-uapi/drm_fourcc.h"
#include "drm-uapi/i915_drm.h"
enum modifier_priority {
MODIFIER_PRIORITY_INVALID = 0,
MODIFIER_PRIORITY_LINEAR,
MODIFIER_PRIORITY_X,
MODIFIER_PRIORITY_Y,
MODIFIER_PRIORITY_Y_CCS,
MODIFIER_PRIORITY_Y_GFX12_RC_CCS,
MODIFIER_PRIORITY_Y_GFX12_RC_CCS_CC,
MODIFIER_PRIORITY_4,
MODIFIER_PRIORITY_4_DG2_RC_CCS,
MODIFIER_PRIORITY_4_DG2_RC_CCS_CC,
};
static const uint64_t priority_to_modifier[] = {
[MODIFIER_PRIORITY_INVALID] = DRM_FORMAT_MOD_INVALID,
[MODIFIER_PRIORITY_LINEAR] = DRM_FORMAT_MOD_LINEAR,
[MODIFIER_PRIORITY_X] = I915_FORMAT_MOD_X_TILED,
[MODIFIER_PRIORITY_Y] = I915_FORMAT_MOD_Y_TILED,
[MODIFIER_PRIORITY_Y_CCS] = I915_FORMAT_MOD_Y_TILED_CCS,
[MODIFIER_PRIORITY_Y_GFX12_RC_CCS] = I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
[MODIFIER_PRIORITY_Y_GFX12_RC_CCS_CC] = I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC,
[MODIFIER_PRIORITY_4] = I915_FORMAT_MOD_4_TILED,
[MODIFIER_PRIORITY_4_DG2_RC_CCS] = I915_FORMAT_MOD_4_TILED_DG2_RC_CCS,
[MODIFIER_PRIORITY_4_DG2_RC_CCS_CC] = I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC,
};
static bool
modifier_is_supported(const struct intel_device_info *devinfo,
enum pipe_format pfmt, unsigned bind,
uint64_t modifier)
{
/* Check for basic device support. */
switch (modifier) {
case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_X_TILED:
break;
case I915_FORMAT_MOD_Y_TILED:
if (devinfo->ver <= 8 && (bind & PIPE_BIND_SCANOUT))
return false;
if (devinfo->verx10 >= 125)
return false;
break;
case I915_FORMAT_MOD_Y_TILED_CCS:
if (devinfo->ver <= 8 || devinfo->ver >= 12)
return false;
break;
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
if (devinfo->verx10 != 120)
return false;
break;
case I915_FORMAT_MOD_4_TILED:
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS:
case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS:
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
if (devinfo->verx10 < 125)
return false;
break;
case DRM_FORMAT_MOD_INVALID:
default:
return false;
}
/* Check remaining requirements. */
switch (modifier) {
case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
if (INTEL_DEBUG(DEBUG_NO_CCS))
return false;
if (pfmt != PIPE_FORMAT_BGRA8888_UNORM &&
pfmt != PIPE_FORMAT_RGBA8888_UNORM &&
pfmt != PIPE_FORMAT_BGRX8888_UNORM &&
pfmt != PIPE_FORMAT_RGBX8888_UNORM &&
pfmt != PIPE_FORMAT_NV12 &&
pfmt != PIPE_FORMAT_P010 &&
pfmt != PIPE_FORMAT_P012 &&
pfmt != PIPE_FORMAT_P016 &&
pfmt != PIPE_FORMAT_YUYV &&
pfmt != PIPE_FORMAT_UYVY) {
return false;
}
break;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
case I915_FORMAT_MOD_Y_TILED_CCS: {
if (INTEL_DEBUG(DEBUG_NO_CCS))
return false;
enum isl_format rt_format =
iris_format_for_usage(devinfo, pfmt,
ISL_SURF_USAGE_RENDER_TARGET_BIT).fmt;
if (rt_format == ISL_FORMAT_UNSUPPORTED ||
!isl_format_supports_ccs_e(devinfo, rt_format))
return false;
break;
}
default:
break;
}
return true;
}
static uint64_t
select_best_modifier(struct intel_device_info *devinfo,
const struct pipe_resource *templ,
const uint64_t *modifiers,
int count)
{
enum modifier_priority prio = MODIFIER_PRIORITY_INVALID;
for (int i = 0; i < count; i++) {
if (!modifier_is_supported(devinfo, templ->format, templ->bind,
modifiers[i]))
continue;
switch (modifiers[i]) {
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
prio = MAX2(prio, MODIFIER_PRIORITY_4_DG2_RC_CCS_CC);
break;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS:
prio = MAX2(prio, MODIFIER_PRIORITY_4_DG2_RC_CCS);
break;
case I915_FORMAT_MOD_4_TILED:
prio = MAX2(prio, MODIFIER_PRIORITY_4);
break;
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
prio = MAX2(prio, MODIFIER_PRIORITY_Y_GFX12_RC_CCS_CC);
break;
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
prio = MAX2(prio, MODIFIER_PRIORITY_Y_GFX12_RC_CCS);
break;
case I915_FORMAT_MOD_Y_TILED_CCS:
prio = MAX2(prio, MODIFIER_PRIORITY_Y_CCS);
break;
case I915_FORMAT_MOD_Y_TILED:
prio = MAX2(prio, MODIFIER_PRIORITY_Y);
break;
case I915_FORMAT_MOD_X_TILED:
prio = MAX2(prio, MODIFIER_PRIORITY_X);
break;
case DRM_FORMAT_MOD_LINEAR:
prio = MAX2(prio, MODIFIER_PRIORITY_LINEAR);
break;
case DRM_FORMAT_MOD_INVALID:
default:
break;
}
}
return priority_to_modifier[prio];
}
static inline bool is_modifier_external_only(enum pipe_format pfmt,
uint64_t modifier)
{
/* Only allow external usage for the following cases: YUV formats
* and the media-compression modifier. The render engine lacks
* support for rendering to a media-compressed surface if the
* compression ratio is large enough. By requiring external usage
* of media-compressed surfaces, resolves are avoided.
*/
return util_format_is_yuv(pfmt) ||
isl_drm_modifier_get_info(modifier)->aux_usage == ISL_AUX_USAGE_MC;
}
static void
iris_query_dmabuf_modifiers(struct pipe_screen *pscreen,
enum pipe_format pfmt,
int max,
uint64_t *modifiers,
unsigned int *external_only,
int *count)
{
struct iris_screen *screen = (void *) pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
uint64_t all_modifiers[] = {
DRM_FORMAT_MOD_LINEAR,
I915_FORMAT_MOD_X_TILED,
I915_FORMAT_MOD_4_TILED,
I915_FORMAT_MOD_4_TILED_DG2_RC_CCS,
I915_FORMAT_MOD_4_TILED_DG2_MC_CCS,
I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC,
I915_FORMAT_MOD_Y_TILED,
I915_FORMAT_MOD_Y_TILED_CCS,
I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS,
I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC,
};
int supported_mods = 0;
for (int i = 0; i < ARRAY_SIZE(all_modifiers); i++) {
if (!modifier_is_supported(devinfo, pfmt, 0, all_modifiers[i]))
continue;
if (supported_mods < max) {
if (modifiers)
modifiers[supported_mods] = all_modifiers[i];
if (external_only) {
external_only[supported_mods] =
is_modifier_external_only(pfmt, all_modifiers[i]);
}
}
supported_mods++;
}
*count = supported_mods;
}
static bool
iris_is_dmabuf_modifier_supported(struct pipe_screen *pscreen,
uint64_t modifier, enum pipe_format pfmt,
bool *external_only)
{
struct iris_screen *screen = (void *) pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
if (modifier_is_supported(devinfo, pfmt, 0, modifier)) {
if (external_only)
*external_only = is_modifier_external_only(pfmt, modifier);
return true;
}
return false;
}
static unsigned int
iris_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
enum pipe_format format)
{
unsigned int planes = util_format_get_num_planes(format);
switch (modifier) {
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
return 3;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
case I915_FORMAT_MOD_Y_TILED_CCS:
return 2 * planes;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS:
case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS:
default:
return planes;
}
}
enum isl_format
iris_image_view_get_format(struct iris_context *ice,
const struct pipe_image_view *img)
{
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT;
enum isl_format isl_fmt =
iris_format_for_usage(devinfo, img->format, usage).fmt;
if (img->shader_access & PIPE_IMAGE_ACCESS_READ) {
/* On Gfx8, try to use typed surfaces reads (which support a
* limited number of formats), and if not possible, fall back
* to untyped reads.
*/
if (devinfo->ver == 8 &&
!isl_has_matching_typed_storage_image_format(devinfo, isl_fmt))
return ISL_FORMAT_RAW;
else
return isl_lower_storage_image_format(devinfo, isl_fmt);
}
return isl_fmt;
}
static struct pipe_memory_object *
iris_memobj_create_from_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle,
bool dedicated)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_memory_object *memobj = CALLOC_STRUCT(iris_memory_object);
struct iris_bo *bo;
if (!memobj)
return NULL;
switch (whandle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
bo = iris_bo_gem_create_from_name(screen->bufmgr, "winsys image",
whandle->handle);
break;
case WINSYS_HANDLE_TYPE_FD:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
bo = iris_bo_import_dmabuf(screen->bufmgr, whandle->handle);
break;
default:
unreachable("invalid winsys handle type");
}
if (!bo) {
free(memobj);
return NULL;
}
memobj->b.dedicated = dedicated;
memobj->bo = bo;
memobj->format = whandle->format;
memobj->stride = whandle->stride;
return &memobj->b;
}
static void
iris_memobj_destroy(struct pipe_screen *pscreen,
struct pipe_memory_object *pmemobj)
{
struct iris_memory_object *memobj = (struct iris_memory_object *)pmemobj;
iris_bo_unreference(memobj->bo);
free(memobj);
}
2018-07-30 15:08:02 -07:00
struct pipe_resource *
iris_resource_get_separate_stencil(struct pipe_resource *p_res)
{
/* For packed depth-stencil, we treat depth as the primary resource
* and store S8 as the "second plane" resource.
*/
if (p_res->next && p_res->next->format == PIPE_FORMAT_S8_UINT)
return p_res->next;
return NULL;
2018-07-30 15:08:02 -07:00
}
static void
iris_resource_set_separate_stencil(struct pipe_resource *p_res,
struct pipe_resource *stencil)
{
assert(util_format_has_depth(util_format_description(p_res->format)));
pipe_resource_reference(&p_res->next, stencil);
}
2018-08-03 16:18:09 -07:00
void
iris_get_depth_stencil_resources(struct pipe_resource *res,
struct iris_resource **out_z,
struct iris_resource **out_s)
{
if (!res) {
*out_z = NULL;
*out_s = NULL;
return;
}
if (res->format != PIPE_FORMAT_S8_UINT) {
2018-08-03 16:18:09 -07:00
*out_z = (void *) res;
*out_s = (void *) iris_resource_get_separate_stencil(res);
} else {
*out_z = NULL;
*out_s = (void *) res;
}
}
2018-12-07 11:54:16 -08:00
void
iris_resource_disable_aux(struct iris_resource *res)
{
iris_bo_unreference(res->aux.bo);
iris_bo_unreference(res->aux.clear_color_bo);
free(res->aux.state);
res->aux.usage = ISL_AUX_USAGE_NONE;
res->aux.surf.size_B = 0;
res->aux.bo = NULL;
res->aux.extra_aux.surf.size_B = 0;
res->aux.clear_color_bo = NULL;
res->aux.state = NULL;
}
static uint32_t
iris_resource_alloc_flags(const struct iris_screen *screen,
const struct pipe_resource *templ,
enum isl_aux_usage aux_usage)
{
if (templ->flags & IRIS_RESOURCE_FLAG_DEVICE_MEM)
return 0;
uint32_t flags = 0;
switch (templ->usage) {
case PIPE_USAGE_STAGING:
flags |= BO_ALLOC_SMEM | BO_ALLOC_COHERENT;
break;
case PIPE_USAGE_STREAM:
flags |= BO_ALLOC_SMEM;
break;
case PIPE_USAGE_DYNAMIC:
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
/* Use LMEM for these if possible */
break;
}
if (templ->bind & PIPE_BIND_SCANOUT)
flags |= BO_ALLOC_SCANOUT;
if (templ->flags & (PIPE_RESOURCE_FLAG_MAP_COHERENT |
PIPE_RESOURCE_FLAG_MAP_PERSISTENT))
flags |= BO_ALLOC_SMEM;
if (screen->devinfo.verx10 >= 125 && isl_aux_usage_has_ccs(aux_usage))
flags |= BO_ALLOC_LMEM;
if ((templ->bind & PIPE_BIND_SHARED) ||
util_format_get_num_planes(templ->format) > 1)
flags |= BO_ALLOC_NO_SUBALLOC;
return flags;
}
static void
iris_resource_destroy(struct pipe_screen *screen,
struct pipe_resource *p_res)
{
struct iris_resource *res = (struct iris_resource *) p_res;
if (p_res->target == PIPE_BUFFER)
util_range_destroy(&res->valid_buffer_range);
iris_resource_disable_aux(res);
threaded_resource_deinit(p_res);
iris_bo_unreference(res->bo);
iris_pscreen_unref(res->orig_screen);
2018-06-16 09:56:59 -07:00
free(res);
}
static struct iris_resource *
iris_alloc_resource(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
struct iris_resource *res = calloc(1, sizeof(struct iris_resource));
if (!res)
return NULL;
res->base.b = *templ;
res->base.b.screen = pscreen;
res->orig_screen = iris_pscreen_ref(pscreen);
pipe_reference_init(&res->base.b.reference, 1);
threaded_resource_init(&res->base.b, false);
if (templ->target == PIPE_BUFFER)
util_range_init(&res->valid_buffer_range);
return res;
}
2018-12-07 11:54:02 -08:00
unsigned
iris_get_num_logical_layers(const struct iris_resource *res, unsigned level)
{
if (res->surf.dim == ISL_SURF_DIM_3D)
return u_minify(res->surf.logical_level0_px.depth, level);
2018-12-07 11:54:02 -08:00
else
return res->surf.logical_level0_px.array_len;
}
static enum isl_aux_state **
create_aux_state_map(struct iris_resource *res, enum isl_aux_state initial)
{
assert(res->aux.state == NULL);
2018-12-07 11:54:02 -08:00
uint32_t total_slices = 0;
for (uint32_t level = 0; level < res->surf.levels; level++)
total_slices += iris_get_num_logical_layers(res, level);
const size_t per_level_array_size =
res->surf.levels * sizeof(enum isl_aux_state *);
/* We're going to allocate a single chunk of data for both the per-level
* reference array and the arrays of aux_state. This makes cleanup
* significantly easier.
*/
const size_t total_size =
per_level_array_size + total_slices * sizeof(enum isl_aux_state);
void *data = malloc(total_size);
if (!data)
return NULL;
enum isl_aux_state **per_level_arr = data;
enum isl_aux_state *s = data + per_level_array_size;
for (uint32_t level = 0; level < res->surf.levels; level++) {
per_level_arr[level] = s;
const unsigned level_layers = iris_get_num_logical_layers(res, level);
for (uint32_t a = 0; a < level_layers; a++)
*(s++) = initial;
}
assert((void *)s == data + total_size);
return per_level_arr;
}
static unsigned
iris_get_aux_clear_color_state_size(struct iris_screen *screen,
struct iris_resource *res)
{
if (!isl_aux_usage_has_fast_clears(res->aux.usage))
return 0;
assert(!isl_surf_usage_is_stencil(res->surf.usage));
/* Depth packets can't specify indirect clear values. The only time depth
* buffers can use indirect clear values is when they're accessed by the
* sampler via render surface state objects.
*/
if (isl_surf_usage_is_depth(res->surf.usage) &&
!iris_sample_with_depth_aux(&screen->devinfo, res))
return 0;
return screen->isl_dev.ss.clear_color_state_size;
}
static void
map_aux_addresses(struct iris_screen *screen, struct iris_resource *res,
enum pipe_format pfmt, unsigned plane)
{
void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr);
if (!aux_map_ctx)
return;
if (isl_aux_usage_has_ccs(res->aux.usage)) {
const unsigned aux_offset = res->aux.extra_aux.surf.size_B > 0 ?
res->aux.extra_aux.offset : res->aux.offset;
const enum isl_format format =
iris_format_for_usage(&screen->devinfo, pfmt, res->surf.usage).fmt;
const uint64_t format_bits =
intel_aux_map_format_bits(res->surf.tiling, format, plane);
intel_aux_map_add_mapping(aux_map_ctx, res->bo->address + res->offset,
res->aux.bo->address + aux_offset,
res->surf.size_B, format_bits);
res->bo->aux_map_address = res->aux.bo->address;
}
}
static bool
want_ccs_e_for_format(const struct intel_device_info *devinfo,
enum isl_format format)
{
if (!isl_format_supports_ccs_e(devinfo, format))
return false;
const struct isl_format_layout *fmtl = isl_format_get_layout(format);
/* Prior to TGL, CCS_E seems to significantly hurt performance with 32-bit
* floating point formats. For example, Paraview's "Wavelet Volume" case
* uses both R32_FLOAT and R32G32B32A32_FLOAT, and enabling CCS_E for those
* formats causes a 62% FPS drop.
*
* However, many benchmarks seem to use 16-bit float with no issues.
*/
if (devinfo->ver <= 11 &&
fmtl->channels.r.bits == 32 && fmtl->channels.r.type == ISL_SFLOAT)
return false;
return true;
}
static enum isl_surf_dim
target_to_isl_surf_dim(enum pipe_texture_target target)
{
switch (target) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return ISL_SURF_DIM_1D;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
return ISL_SURF_DIM_2D;
case PIPE_TEXTURE_3D:
return ISL_SURF_DIM_3D;
case PIPE_MAX_TEXTURE_TYPES:
break;
}
unreachable("invalid texture type");
}
static bool
iris_resource_configure_main(const struct iris_screen *screen,
struct iris_resource *res,
const struct pipe_resource *templ,
uint64_t modifier, uint32_t row_pitch_B)
{
res->mod_info = isl_drm_modifier_get_info(modifier);
if (modifier != DRM_FORMAT_MOD_INVALID && res->mod_info == NULL)
return false;
isl_tiling_flags_t tiling_flags = 0;
if (res->mod_info != NULL) {
tiling_flags = 1 << res->mod_info->tiling;
} else if (templ->usage == PIPE_USAGE_STAGING ||
templ->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) {
tiling_flags = ISL_TILING_LINEAR_BIT;
} else if (templ->bind & PIPE_BIND_SCANOUT) {
tiling_flags = screen->devinfo.has_tiling_uapi ?
ISL_TILING_X_BIT : ISL_TILING_LINEAR_BIT;
} else {
tiling_flags = ISL_TILING_ANY_MASK;
}
isl_surf_usage_flags_t usage = 0;
if (res->mod_info && res->mod_info->aux_usage == ISL_AUX_USAGE_NONE)
usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
if (templ->usage == PIPE_USAGE_STAGING)
usage |= ISL_SURF_USAGE_STAGING_BIT;
if (templ->bind & PIPE_BIND_RENDER_TARGET)
usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
if (templ->bind & PIPE_BIND_SAMPLER_VIEW)
usage |= ISL_SURF_USAGE_TEXTURE_BIT;
if (templ->bind & PIPE_BIND_SHADER_IMAGE)
usage |= ISL_SURF_USAGE_STORAGE_BIT;
if (templ->bind & PIPE_BIND_SCANOUT)
usage |= ISL_SURF_USAGE_DISPLAY_BIT;
if (templ->target == PIPE_TEXTURE_CUBE ||
templ->target == PIPE_TEXTURE_CUBE_ARRAY) {
usage |= ISL_SURF_USAGE_CUBE_BIT;
}
if (templ->usage != PIPE_USAGE_STAGING &&
util_format_is_depth_or_stencil(templ->format)) {
/* Should be handled by u_transfer_helper */
assert(!util_format_is_depth_and_stencil(templ->format));
usage |= templ->format == PIPE_FORMAT_S8_UINT ?
ISL_SURF_USAGE_STENCIL_BIT : ISL_SURF_USAGE_DEPTH_BIT;
}
const enum isl_format format =
iris_format_for_usage(&screen->devinfo, templ->format, usage).fmt;
const struct isl_surf_init_info init_info = {
.dim = target_to_isl_surf_dim(templ->target),
.format = format,
.width = templ->width0,
.height = templ->height0,
.depth = templ->depth0,
.levels = templ->last_level + 1,
.array_len = templ->array_size,
.samples = MAX2(templ->nr_samples, 1),
.min_alignment_B = 0,
.row_pitch_B = row_pitch_B,
.usage = usage,
.tiling_flags = tiling_flags
};
if (!isl_surf_init_s(&screen->isl_dev, &res->surf, &init_info))
return false;
res->internal_format = templ->format;
return true;
}
static bool
iris_get_ccs_surf_or_support(const struct isl_device *dev,
const struct isl_surf *surf,
struct isl_surf *aux_surf,
struct isl_surf *extra_aux_surf)
{
assert(extra_aux_surf->size_B == 0);
struct isl_surf *ccs_surf;
const struct isl_surf *hiz_or_mcs_surf;
if (aux_surf->size_B > 0) {
assert(aux_surf->usage & (ISL_SURF_USAGE_HIZ_BIT |
ISL_SURF_USAGE_MCS_BIT));
hiz_or_mcs_surf = aux_surf;
ccs_surf = extra_aux_surf;
} else {
hiz_or_mcs_surf = NULL;
ccs_surf = aux_surf;
}
if (dev->info->verx10 >= 125) {
/* CCS doesn't require VMA on XeHP. So, instead of creating a separate
* surface, we can just return whether CCS is supported for the given
* input surfaces.
*/
return isl_surf_supports_ccs(dev, surf, hiz_or_mcs_surf);
} else {
return isl_surf_get_ccs_surf(dev, surf, hiz_or_mcs_surf, ccs_surf, 0);
}
}
2018-12-07 11:54:02 -08:00
/**
* Configure aux for the resource, but don't allocate it. For images which
* might be shared with modifiers, we must allocate the image and aux data in
* a single bo.
*
* Returns false on unexpected error (e.g. allocation failed, or invalid
* configuration result).
2018-12-07 11:54:02 -08:00
*/
static bool
iris_resource_configure_aux(struct iris_screen *screen,
struct iris_resource *res, bool imported)
2018-12-07 11:54:02 -08:00
{
const struct intel_device_info *devinfo = &screen->devinfo;
const bool has_mcs =
isl_surf_get_mcs_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
const bool has_hiz = !INTEL_DEBUG(DEBUG_NO_HIZ) &&
isl_surf_get_hiz_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
2018-12-07 11:54:02 -08:00
const bool has_ccs = !INTEL_DEBUG(DEBUG_NO_CCS) &&
iris_get_ccs_surf_or_support(&screen->isl_dev, &res->surf,
&res->aux.surf, &res->aux.extra_aux.surf);
if (has_mcs) {
assert(!res->mod_info);
assert(!has_hiz);
if (has_ccs) {
res->aux.usage = ISL_AUX_USAGE_MCS_CCS;
} else {
res->aux.usage = ISL_AUX_USAGE_MCS;
}
} else if (has_hiz) {
assert(!res->mod_info);
assert(!has_mcs);
if (!has_ccs) {
res->aux.usage = ISL_AUX_USAGE_HIZ;
} else if (res->surf.samples == 1 &&
(res->surf.usage & ISL_SURF_USAGE_TEXTURE_BIT)) {
/* If this resource is single-sampled and will be used as a texture,
* put the HiZ surface in write-through mode so that we can sample
* from it.
*/
res->aux.usage = ISL_AUX_USAGE_HIZ_CCS_WT;
} else {
res->aux.usage = ISL_AUX_USAGE_HIZ_CCS;
}
} else if (has_ccs) {
if (res->mod_info) {
res->aux.usage = res->mod_info->aux_usage;
} else if (isl_surf_usage_is_stencil(res->surf.usage)) {
res->aux.usage = ISL_AUX_USAGE_STC_CCS;
} else if (want_ccs_e_for_format(devinfo, res->surf.format)) {
res->aux.usage = devinfo->ver < 12 ?
ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_GFX12_CCS_E;
} else {
assert(isl_format_supports_ccs_d(devinfo, res->surf.format));
res->aux.usage = ISL_AUX_USAGE_CCS_D;
}
}
enum isl_aux_state initial_state;
2018-12-07 11:54:02 -08:00
switch (res->aux.usage) {
case ISL_AUX_USAGE_NONE:
/* Having no aux buffer is only okay if there's no modifier with aux. */
return !res->mod_info || res->mod_info->aux_usage == ISL_AUX_USAGE_NONE;
2018-12-07 11:54:02 -08:00
case ISL_AUX_USAGE_HIZ:
case ISL_AUX_USAGE_HIZ_CCS:
case ISL_AUX_USAGE_HIZ_CCS_WT:
2018-12-07 11:54:02 -08:00
initial_state = ISL_AUX_STATE_AUX_INVALID;
break;
case ISL_AUX_USAGE_MCS:
case ISL_AUX_USAGE_MCS_CCS:
2018-12-07 11:54:02 -08:00
/* The Ivybridge PRM, Vol 2 Part 1 p326 says:
*
* "When MCS buffer is enabled and bound to MSRT, it is required
* that it is cleared prior to any rendering."
*
* Since we only use the MCS buffer for rendering, we just clear it
* immediately on allocation. The clear value for MCS buffers is all
* 1's, so we simply memset it to 0xff.
*/
initial_state = ISL_AUX_STATE_CLEAR;
break;
case ISL_AUX_USAGE_CCS_D:
case ISL_AUX_USAGE_CCS_E:
case ISL_AUX_USAGE_GFX12_CCS_E:
case ISL_AUX_USAGE_STC_CCS:
case ISL_AUX_USAGE_MC:
if (imported) {
assert(res->aux.usage != ISL_AUX_USAGE_STC_CCS);
initial_state =
isl_drm_modifier_get_default_aux_state(res->mod_info->modifier);
} else if (devinfo->verx10 >= 125) {
assert(res->aux.surf.size_B == 0);
/* From Bspec 47709, "MCS/CCS Buffers for Render Target(s)":
*
* "CCS surface does not require initialization. Illegal CCS
* [values] are treated as uncompressed memory."
*
* Even if we wanted to, we can't initialize the CCS via CPU map. So,
* we choose an aux state which describes the current state and helps
* avoid ambiguating (something not currently supported for STC_CCS).
*/
assert(isl_aux_usage_has_compression(res->aux.usage));
initial_state = isl_aux_usage_has_fast_clears(res->aux.usage) ?
ISL_AUX_STATE_COMPRESSED_CLEAR :
ISL_AUX_STATE_COMPRESSED_NO_CLEAR;
} else {
assert(res->aux.surf.size_B > 0);
/* When CCS is used, we need to ensure that it starts off in a valid
* state. From the Sky Lake PRM, "MCS Buffer for Render Target(s)":
*
* "If Software wants to enable Color Compression without Fast
* clear, Software needs to initialize MCS with zeros."
*
* A CCS surface initialized to zero is in the pass-through state.
* This state can avoid the need to ambiguate in some cases. We'll
* map and zero the CCS later on in iris_resource_init_aux_buf.
*/
initial_state = ISL_AUX_STATE_PASS_THROUGH;
}
2018-12-07 11:54:02 -08:00
break;
default:
unreachable("Unsupported aux mode");
2018-12-07 11:54:02 -08:00
}
/* Create the aux_state for the auxiliary buffer. */
res->aux.state = create_aux_state_map(res, initial_state);
if (!res->aux.state)
return false;
2018-12-07 11:54:02 -08:00
return true;
}
/**
* Initialize the aux buffer contents.
*
* Returns false on unexpected error (e.g. mapping a BO failed).
*/
static bool
iris_resource_init_aux_buf(struct iris_screen *screen,
struct iris_resource *res)
{
void *map = iris_bo_map(NULL, res->bo, MAP_WRITE | MAP_RAW);
if (!map)
return false;
2018-12-07 11:54:02 -08:00
if (iris_resource_get_aux_state(res, 0, 0) != ISL_AUX_STATE_AUX_INVALID) {
/* See iris_resource_configure_aux for the memset_value rationale. */
uint8_t memset_value = isl_aux_usage_has_mcs(res->aux.usage) ? 0xFF : 0;
memset((char*)map + res->aux.offset, memset_value,
res->aux.surf.size_B);
}
memset((char*)map + res->aux.extra_aux.offset,
0, res->aux.extra_aux.surf.size_B);
/* Zero the indirect clear color to match ::fast_clear_color. */
memset((char *)map + res->aux.clear_color_offset, 0,
iris_get_aux_clear_color_state_size(screen, res));
iris_bo_unmap(res->bo);
2018-12-07 11:54:02 -08:00
if (res->aux.surf.size_B > 0) {
res->aux.bo = res->bo;
iris_bo_reference(res->aux.bo);
map_aux_addresses(screen, res, res->internal_format, 0);
}
if (iris_get_aux_clear_color_state_size(screen, res) > 0) {
res->aux.clear_color_bo = res->bo;
iris_bo_reference(res->aux.clear_color_bo);
}
return true;
}
2018-12-10 00:35:48 -08:00
static void
import_aux_info(struct iris_resource *res,
const struct iris_resource *aux_res)
{
assert(aux_res->aux.surf.row_pitch_B && aux_res->aux.offset);
assert(res->bo == aux_res->aux.bo);
assert(res->aux.surf.row_pitch_B == aux_res->aux.surf.row_pitch_B);
assert(res->bo->size >= aux_res->aux.offset + res->aux.surf.size_B);
iris_bo_reference(aux_res->aux.bo);
res->aux.bo = aux_res->aux.bo;
res->aux.offset = aux_res->aux.offset;
}
static void
iris_resource_finish_aux_import(struct pipe_screen *pscreen,
struct iris_resource *res)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
/* Create an array of resources. Combining main and aux planes is easier
* with indexing as opposed to scanning the linked list.
*/
struct iris_resource *r[4] = { NULL, };
unsigned num_planes = 0;
unsigned num_main_planes = 0;
for (struct pipe_resource *p_res = &res->base.b; p_res; p_res = p_res->next) {
r[num_planes] = (struct iris_resource *)p_res;
num_main_planes += r[num_planes++]->bo != NULL;
}
/* Combine main and aux plane information. */
switch (res->mod_info->modifier) {
case I915_FORMAT_MOD_Y_TILED_CCS:
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
assert(num_main_planes == 1 && num_planes == 2);
import_aux_info(r[0], r[1]);
map_aux_addresses(screen, r[0], res->external_format, 0);
/* Add on a clear color BO.
*
* Also add some padding to make sure the fast clear color state buffer
* starts at a 4K alignment to avoid some unknown issues. See the
* matching comment in iris_resource_create_with_modifiers().
*/
if (iris_get_aux_clear_color_state_size(screen, res) > 0) {
res->aux.clear_color_bo =
iris_bo_alloc(screen->bufmgr, "clear color_buffer",
iris_get_aux_clear_color_state_size(screen, res),
4096, IRIS_MEMZONE_OTHER, BO_ALLOC_ZEROED);
}
break;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS:
assert(num_main_planes == 1);
assert(num_planes == 1);
res->aux.clear_color_bo =
iris_bo_alloc(screen->bufmgr, "clear color_buffer",
iris_get_aux_clear_color_state_size(screen, res),
4096, IRIS_MEMZONE_OTHER, BO_ALLOC_ZEROED);
break;
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
assert(num_main_planes == 1 && num_planes == 3);
import_aux_info(r[0], r[1]);
map_aux_addresses(screen, r[0], res->external_format, 0);
/* Import the clear color BO. */
iris_bo_reference(r[2]->aux.clear_color_bo);
r[0]->aux.clear_color_bo = r[2]->aux.clear_color_bo;
r[0]->aux.clear_color_offset = r[2]->aux.clear_color_offset;
r[0]->aux.clear_color_unknown = true;
break;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
assert(num_main_planes == 1);
assert(num_planes == 2);
/* Import the clear color BO. */
iris_bo_reference(r[1]->aux.clear_color_bo);
r[0]->aux.clear_color_bo = r[1]->aux.clear_color_bo;
r[0]->aux.clear_color_offset = r[1]->aux.clear_color_offset;
r[0]->aux.clear_color_unknown = true;
break;
case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
if (num_main_planes == 1 && num_planes == 2) {
import_aux_info(r[0], r[1]);
map_aux_addresses(screen, r[0], res->external_format, 0);
} else {
assert(num_main_planes == 2 && num_planes == 4);
import_aux_info(r[0], r[2]);
import_aux_info(r[1], r[3]);
map_aux_addresses(screen, r[0], res->external_format, 0);
map_aux_addresses(screen, r[1], res->external_format, 1);
}
assert(!isl_aux_usage_has_fast_clears(res->mod_info->aux_usage));
break;
case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS:
assert(!isl_aux_usage_has_fast_clears(res->mod_info->aux_usage));
break;
default:
assert(res->mod_info->aux_usage == ISL_AUX_USAGE_NONE);
break;
}
}
static struct pipe_resource *
iris_resource_create_for_buffer(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_resource *res = iris_alloc_resource(pscreen, templ);
assert(templ->target == PIPE_BUFFER);
assert(templ->height0 <= 1);
assert(templ->depth0 <= 1);
assert(templ->format == PIPE_FORMAT_NONE ||
util_format_get_blocksize(templ->format) == 1);
res->internal_format = templ->format;
res->surf.tiling = ISL_TILING_LINEAR;
enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
const char *name = templ->target == PIPE_BUFFER ? "buffer" : "miptree";
if (templ->flags & IRIS_RESOURCE_FLAG_SHADER_MEMZONE) {
memzone = IRIS_MEMZONE_SHADER;
name = "shader kernels";
} else if (templ->flags & IRIS_RESOURCE_FLAG_SURFACE_MEMZONE) {
memzone = IRIS_MEMZONE_SURFACE;
name = "surface state";
} else if (templ->flags & IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE) {
memzone = IRIS_MEMZONE_DYNAMIC;
name = "dynamic state";
} else if (templ->flags & IRIS_RESOURCE_FLAG_BINDLESS_MEMZONE) {
memzone = IRIS_MEMZONE_BINDLESS;
name = "bindless surface state";
}
unsigned flags = iris_resource_alloc_flags(screen, templ, res->aux.usage);
res->bo =
iris_bo_alloc(screen->bufmgr, name, templ->width0, 1, memzone, flags);
if (!res->bo) {
iris_resource_destroy(pscreen, &res->base.b);
return NULL;
}
if (templ->bind & PIPE_BIND_SHARED) {
iris_bo_mark_exported(res->bo);
res->base.is_shared = true;
}
return &res->base.b;
}
static struct pipe_resource *
iris_resource_create_with_modifiers(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
const uint64_t *modifiers,
int modifiers_count)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct intel_device_info *devinfo = &screen->devinfo;
struct iris_resource *res = iris_alloc_resource(pscreen, templ);
2018-07-30 15:08:02 -07:00
if (!res)
return NULL;
uint64_t modifier =
select_best_modifier(devinfo, templ, modifiers, modifiers_count);
if (modifier == DRM_FORMAT_MOD_INVALID && modifiers_count > 0) {
fprintf(stderr, "Unsupported modifier, resource creation failed.\n");
goto fail;
2018-07-30 15:08:02 -07:00
}
2018-04-25 15:25:33 -07:00
2018-04-24 23:38:10 -07:00
UNUSED const bool isl_surf_created_successfully =
iris_resource_configure_main(screen, res, templ, modifier, 0);
2018-04-24 23:38:10 -07:00
assert(isl_surf_created_successfully);
if (!iris_resource_configure_aux(screen, res, false))
goto fail;
const char *name = "miptree";
enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
unsigned flags = iris_resource_alloc_flags(screen, templ, res->aux.usage);
/* These are for u_upload_mgr buffers only */
assert(!(templ->flags & (IRIS_RESOURCE_FLAG_SHADER_MEMZONE |
IRIS_RESOURCE_FLAG_SURFACE_MEMZONE |
IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE |
IRIS_RESOURCE_FLAG_BINDLESS_MEMZONE)));
/* Modifiers require the aux data to be in the same buffer as the main
* surface, but we combine them even when a modifier is not being used.
*/
uint64_t bo_size = res->surf.size_B;
/* Allocate space for the aux buffer. */
if (res->aux.surf.size_B > 0) {
res->aux.offset = ALIGN(bo_size, res->aux.surf.alignment_B);
bo_size = res->aux.offset + res->aux.surf.size_B;
}
/* Allocate space for the extra aux buffer. */
if (res->aux.extra_aux.surf.size_B > 0) {
res->aux.extra_aux.offset =
ALIGN(bo_size, res->aux.extra_aux.surf.alignment_B);
bo_size = res->aux.extra_aux.offset + res->aux.extra_aux.surf.size_B;
}
/* Allocate space for the indirect clear color.
*
* Also add some padding to make sure the fast clear color state buffer
* starts at a 4K alignment. We believe that 256B might be enough, but due
* to lack of testing we will leave this as 4K for now.
*/
if (iris_get_aux_clear_color_state_size(screen, res) > 0) {
res->aux.clear_color_offset = ALIGN(bo_size, 4096);
bo_size = res->aux.clear_color_offset +
iris_get_aux_clear_color_state_size(screen, res);
}
uint32_t alignment = MAX2(4096, res->surf.alignment_B);
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
res->bo =
iris_bo_alloc(screen->bufmgr, name, bo_size, alignment, memzone, flags);
2018-12-07 11:54:02 -08:00
if (!res->bo)
goto fail;
if (res->aux.usage != ISL_AUX_USAGE_NONE &&
!iris_resource_init_aux_buf(screen, res))
goto fail;
if (templ->bind & PIPE_BIND_SHARED) {
iris_bo_mark_exported(res->bo);
res->base.is_shared = true;
}
return &res->base.b;
2018-12-07 11:54:02 -08:00
fail:
fprintf(stderr, "XXX: resource creation failed\n");
iris_resource_destroy(pscreen, &res->base.b);
2018-12-07 11:54:02 -08:00
return NULL;
}
static struct pipe_resource *
iris_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER)
return iris_resource_create_for_buffer(pscreen, templ);
else
return iris_resource_create_with_modifiers(pscreen, templ, NULL, 0);
}
2018-06-02 12:32:19 +01:00
static uint64_t
tiling_to_modifier(uint32_t tiling)
{
static const uint64_t map[] = {
[I915_TILING_NONE] = DRM_FORMAT_MOD_LINEAR,
[I915_TILING_X] = I915_FORMAT_MOD_X_TILED,
[I915_TILING_Y] = I915_FORMAT_MOD_Y_TILED,
};
assert(tiling < ARRAY_SIZE(map));
return map[tiling];
}
static struct pipe_resource *
iris_resource_from_user_memory(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
void *user_memory)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_bufmgr *bufmgr = screen->bufmgr;
struct iris_resource *res = iris_alloc_resource(pscreen, templ);
if (!res)
return NULL;
if (templ->target != PIPE_BUFFER &&
templ->target != PIPE_TEXTURE_1D &&
templ->target != PIPE_TEXTURE_2D)
return NULL;
if (templ->array_size > 1)
return NULL;
size_t res_size = templ->width0;
if (templ->target != PIPE_BUFFER) {
const uint32_t row_pitch_B =
templ->width0 * util_format_get_blocksize(templ->format);
res_size = templ->height0 * row_pitch_B;
if (!iris_resource_configure_main(screen, res, templ,
DRM_FORMAT_MOD_LINEAR,
row_pitch_B)) {
iris_resource_destroy(pscreen, &res->base.b);
return NULL;
}
assert(res->surf.size_B <= res_size);
}
/* The userptr ioctl only works on whole pages. Because we know that
* things will exist in memory at a page granularity, we can expand the
* range given by the client into the whole number of pages and use an
* offset on the resource to make it looks like it starts at the user's
* pointer.
*/
size_t page_size = getpagesize();
assert(util_is_power_of_two_nonzero(page_size));
size_t offset = (uintptr_t)user_memory & (page_size - 1);
void *mem_start = (char *)user_memory - offset;
size_t mem_size = offset + res_size;
mem_size = ALIGN_NPOT(mem_size, page_size);
res->internal_format = templ->format;
res->base.is_user_ptr = true;
res->bo = iris_bo_create_userptr(bufmgr, "user", mem_start, mem_size,
IRIS_MEMZONE_OTHER);
res->offset = offset;
if (!res->bo) {
iris_resource_destroy(pscreen, &res->base.b);
return NULL;
}
util_range_add(&res->base.b, &res->valid_buffer_range, 0, templ->width0);
return &res->base.b;
}
static bool
mod_plane_is_clear_color(uint64_t modifier, uint32_t plane)
{
ASSERTED const struct isl_drm_modifier_info *mod_info =
isl_drm_modifier_get_info(modifier);
assert(mod_info);
switch (modifier) {
case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
assert(mod_info->supports_clear_color);
return plane == 2;
case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC:
assert(mod_info->supports_clear_color);
return plane == 1;
default:
assert(!mod_info->supports_clear_color);
return false;
}
}
static unsigned
get_num_planes(const struct pipe_resource *resource)
{
unsigned count = 0;
for (const struct pipe_resource *cur = resource; cur; cur = cur->next)
count++;
return count;
}
static struct pipe_resource *
iris_resource_from_handle(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
struct winsys_handle *whandle,
unsigned usage)
{
assert(templ->target != PIPE_BUFFER);
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_bufmgr *bufmgr = screen->bufmgr;
struct iris_resource *res = iris_alloc_resource(pscreen, templ);
if (!res)
return NULL;
switch (whandle->type) {
2018-06-02 12:32:19 +01:00
case WINSYS_HANDLE_TYPE_FD:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
res->bo = iris_bo_import_dmabuf(bufmgr, whandle->handle);
break;
2018-06-02 12:32:19 +01:00
case WINSYS_HANDLE_TYPE_SHARED:
res->bo = iris_bo_gem_create_from_name(bufmgr, "winsys image",
whandle->handle);
break;
default:
unreachable("invalid winsys handle type");
}
2018-06-02 12:32:19 +01:00
if (!res->bo)
goto fail;
res->offset = whandle->offset;
res->external_format = whandle->format;
2018-10-07 20:31:09 -07:00
/* Create a surface for each plane specified by the external format. */
if (whandle->plane < util_format_get_num_planes(whandle->format)) {
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
uint64_t modifier = whandle->modifier;
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
if (whandle->modifier == DRM_FORMAT_MOD_INVALID) {
/* We don't have a modifier; match whatever GEM_GET_TILING says */
uint32_t tiling;
iris_gem_get_tiling(res->bo, &tiling);
modifier = tiling_to_modifier(tiling);
}
UNUSED const bool isl_surf_created_successfully =
iris_resource_configure_main(screen, res, templ, modifier,
whandle->stride);
assert(isl_surf_created_successfully);
UNUSED const bool ok = iris_resource_configure_aux(screen, res, true);
assert(ok);
/* The gallium dri layer will create a separate plane resource for the
* aux image. iris_resource_finish_aux_import will merge the separate aux
* parameters back into a single iris_resource.
*/
} else if (mod_plane_is_clear_color(whandle->modifier, whandle->plane)) {
res->aux.clear_color_offset = whandle->offset;
res->aux.clear_color_bo = res->bo;
res->bo = NULL;
} else {
/* Save modifier import information to reconstruct later. After import,
* this will be available under a second image accessible from the main
* image with res->base.next. See iris_resource_finish_aux_import.
*/
res->aux.surf.row_pitch_B = whandle->stride;
res->aux.offset = whandle->offset;
res->aux.bo = res->bo;
res->bo = NULL;
}
if (get_num_planes(&res->base.b) ==
iris_get_dmabuf_modifier_planes(pscreen, whandle->modifier,
whandle->format)) {
iris_resource_finish_aux_import(pscreen, res);
}
return &res->base.b;
fail:
iris_resource_destroy(pscreen, &res->base.b);
return NULL;
}
static struct pipe_resource *
iris_resource_from_memobj(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
struct pipe_memory_object *pmemobj,
uint64_t offset)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_memory_object *memobj = (struct iris_memory_object *)pmemobj;
struct iris_resource *res = iris_alloc_resource(pscreen, templ);
if (!res)
return NULL;
if (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) {
UNUSED const bool isl_surf_created_successfully =
iris_resource_configure_main(screen, res, templ, DRM_FORMAT_MOD_INVALID, 0);
assert(isl_surf_created_successfully);
}
res->bo = memobj->bo;
res->offset = offset;
res->external_format = memobj->format;
res->internal_format = templ->format;
iris_bo_reference(memobj->bo);
return &res->base.b;
}
/* Handle combined depth/stencil with memory objects.
*
* This function is modeled after u_transfer_helper_resource_create.
*/
static struct pipe_resource *
iris_resource_from_memobj_wrapper(struct pipe_screen *pscreen,
const struct pipe_resource *templ,
struct pipe_memory_object *pmemobj,
uint64_t offset)
{
enum pipe_format format = templ->format;
/* Normal case, no special handling: */
if (!(util_format_is_depth_and_stencil(format)))
return iris_resource_from_memobj(pscreen, templ, pmemobj, offset);
struct pipe_resource t = *templ;
t.format = util_format_get_depth_only(format);
struct pipe_resource *prsc =
iris_resource_from_memobj(pscreen, &t, pmemobj, offset);
if (!prsc)
return NULL;
struct iris_resource *res = (struct iris_resource *) prsc;
/* Stencil offset in the buffer without aux. */
uint64_t s_offset = offset +
ALIGN(res->surf.size_B, res->surf.alignment_B);
prsc->format = format; /* frob the format back to the "external" format */
t.format = PIPE_FORMAT_S8_UINT;
struct pipe_resource *stencil =
iris_resource_from_memobj(pscreen, &t, pmemobj, s_offset);
if (!stencil) {
iris_resource_destroy(pscreen, prsc);
return NULL;
}
iris_resource_set_separate_stencil(prsc, stencil);
return prsc;
}
static void
iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
{
struct iris_context *ice = (struct iris_context *)ctx;
struct iris_resource *res = (void *) resource;
const struct isl_drm_modifier_info *mod = res->mod_info;
iris_resource_prepare_access(ice, res,
0, INTEL_REMAINING_LEVELS,
0, INTEL_REMAINING_LAYERS,
mod ? mod->aux_usage : ISL_AUX_USAGE_NONE,
mod ? mod->supports_clear_color : false);
if (!res->mod_info && res->aux.usage != ISL_AUX_USAGE_NONE) {
/* flush_resource may be used to prepare an image for sharing external
* to the driver (e.g. via eglCreateImage). To account for this, make
* sure to get rid of any compression that a consumer wouldn't know how
* to handle.
*/
iris_foreach_batch(ice, batch) {
if (iris_batch_references(batch, res->bo))
iris_batch_flush(batch);
}
iris_resource_disable_aux(res);
}
}
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
/**
* Reallocate a (non-external) resource into new storage, copying the data
* and modifying the original resource to point at the new storage.
*
* This is useful for e.g. moving a suballocated internal resource to a
* dedicated allocation that can be exported by itself.
*/
static void
iris_reallocate_resource_inplace(struct iris_context *ice,
struct iris_resource *old_res,
unsigned new_bind_flag)
{
struct pipe_screen *pscreen = ice->ctx.screen;
if (iris_bo_is_external(old_res->bo))
return;
assert(old_res->mod_info == NULL);
assert(old_res->bo == old_res->aux.bo || old_res->aux.bo == NULL);
assert(old_res->bo == old_res->aux.clear_color_bo ||
old_res->aux.clear_color_bo == NULL);
assert(old_res->external_format == PIPE_FORMAT_NONE);
struct pipe_resource templ = old_res->base.b;
templ.bind |= new_bind_flag;
struct iris_resource *new_res =
(void *) pscreen->resource_create(pscreen, &templ);
assert(iris_bo_is_real(new_res->bo));
struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
if (old_res->base.b.target == PIPE_BUFFER) {
struct pipe_box box = (struct pipe_box) {
.width = old_res->base.b.width0,
.height = 1,
};
iris_copy_region(&ice->blorp, batch, &new_res->base.b, 0, 0, 0, 0,
&old_res->base.b, 0, &box);
} else {
for (unsigned l = 0; l <= templ.last_level; l++) {
struct pipe_box box = (struct pipe_box) {
.width = u_minify(templ.width0, l),
.height = u_minify(templ.height0, l),
.depth = util_num_layers(&templ, l),
};
iris_copy_region(&ice->blorp, batch, &new_res->base.b, l, 0, 0, 0,
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
&old_res->base.b, l, &box);
}
}
iris_flush_resource(&ice->ctx, &new_res->base.b);
struct iris_bo *old_bo = old_res->bo;
struct iris_bo *old_aux_bo = old_res->aux.bo;
struct iris_bo *old_clear_color_bo = old_res->aux.clear_color_bo;
/* Replace the structure fields with the new ones */
old_res->base.b.bind = templ.bind;
old_res->bo = new_res->bo;
old_res->aux.surf = new_res->aux.surf;
old_res->aux.bo = new_res->aux.bo;
old_res->aux.offset = new_res->aux.offset;
old_res->aux.extra_aux.surf = new_res->aux.extra_aux.surf;
old_res->aux.extra_aux.offset = new_res->aux.extra_aux.offset;
old_res->aux.clear_color_bo = new_res->aux.clear_color_bo;
old_res->aux.clear_color_offset = new_res->aux.clear_color_offset;
old_res->aux.usage = new_res->aux.usage;
if (new_res->aux.state) {
assert(old_res->aux.state);
for (unsigned l = 0; l <= templ.last_level; l++) {
unsigned layers = util_num_layers(&templ, l);
for (unsigned z = 0; z < layers; z++) {
enum isl_aux_state aux =
iris_resource_get_aux_state(new_res, l, z);
iris_resource_set_aux_state(ice, old_res, l, z, 1, aux);
}
}
}
/* old_res now points at the new BOs, make new_res point at the old ones
* so they'll be freed when we unreference the resource below.
*/
new_res->bo = old_bo;
new_res->aux.bo = old_aux_bo;
new_res->aux.clear_color_bo = old_clear_color_bo;
pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
}
static void
iris_resource_disable_suballoc_on_first_query(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct iris_resource *res)
{
if (iris_bo_is_real(res->bo))
return;
assert(!(res->base.b.bind & PIPE_BIND_SHARED));
bool destroy_context;
if (ctx) {
ctx = threaded_context_unwrap_sync(ctx);
destroy_context = false;
} else {
/* We need to execute a blit on some GPU context, but the DRI layer
* often doesn't give us one. So we have to invent a temporary one.
*
* We can't store a permanent context in the screen, as it would cause
* circular refcounting where screens reference contexts that reference
* resources, while resources reference screens...causing nothing to be
* freed. So we just create and destroy a temporary one here.
*/
ctx = iris_create_context(pscreen, NULL, 0);
destroy_context = true;
}
struct iris_context *ice = (struct iris_context *)ctx;
iris_reallocate_resource_inplace(ice, res, PIPE_BIND_SHARED);
assert(res->base.b.bind & PIPE_BIND_SHARED);
if (destroy_context)
iris_destroy_context(ctx);
}
static void
iris_resource_disable_aux_on_first_query(struct pipe_resource *resource,
unsigned usage)
{
struct iris_resource *res = (struct iris_resource *)resource;
bool mod_with_aux =
res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
/* Disable aux usage if explicit flush not set and this is the first time
* we are dealing with this resource and the resource was not created with
* a modifier with aux.
*/
if (!mod_with_aux &&
(!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && res->aux.usage != 0) &&
p_atomic_read(&resource->reference.count) == 1) {
iris_resource_disable_aux(res);
}
}
static bool
iris_resource_get_param(struct pipe_screen *pscreen,
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned plane,
unsigned layer,
unsigned level,
enum pipe_resource_param param,
unsigned handle_usage,
uint64_t *value)
{
struct iris_screen *screen = (struct iris_screen *)pscreen;
struct iris_resource *res = (struct iris_resource *)resource;
bool mod_with_aux =
res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
bool wants_aux = mod_with_aux && plane > 0;
bool wants_cc = mod_with_aux &&
mod_plane_is_clear_color(res->mod_info->modifier, plane);
bool result;
unsigned handle;
iris_resource_disable_aux_on_first_query(resource, handle_usage);
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
iris_resource_disable_suballoc_on_first_query(pscreen, ctx, res);
struct iris_bo *bo = wants_cc ? res->aux.clear_color_bo :
wants_aux ? res->aux.bo : res->bo;
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
assert(iris_bo_is_real(bo));
switch (param) {
case PIPE_RESOURCE_PARAM_NPLANES:
if (mod_with_aux) {
*value = iris_get_dmabuf_modifier_planes(pscreen,
res->mod_info->modifier,
res->external_format);
} else {
*value = get_num_planes(&res->base.b);
}
return true;
case PIPE_RESOURCE_PARAM_STRIDE:
*value = wants_cc ? 64 :
wants_aux ? res->aux.surf.row_pitch_B : res->surf.row_pitch_B;
/* Mesa's implementation of eglCreateImage rejects strides of zero (see
* dri2_check_dma_buf_attribs). Ensure we return a non-zero stride as
* this value may be queried from GBM and passed into EGL.
*
* Also, although modifiers which use a clear color plane specify that
* the plane's pitch should be ignored, some kernels have been found to
* require 64-byte alignment.
*/
assert(*value != 0 && (!wants_cc || *value % 64 == 0));
return true;
case PIPE_RESOURCE_PARAM_OFFSET:
*value = wants_cc ? res->aux.clear_color_offset :
wants_aux ? res->aux.offset : 0;
return true;
case PIPE_RESOURCE_PARAM_MODIFIER:
*value = res->mod_info ? res->mod_info->modifier :
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
tiling_to_modifier(isl_tiling_to_i915_tiling(res->surf.tiling));
return true;
case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
if (!wants_aux)
iris_gem_set_tiling(bo, &res->surf);
result = iris_bo_flink(bo, &handle) == 0;
if (result)
*value = handle;
return result;
case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS: {
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
if (!wants_aux)
iris_gem_set_tiling(bo, &res->surf);
/* Because we share the same drm file across multiple iris_screen, when
* we export a GEM handle we must make sure it is valid in the DRM file
* descriptor the caller is using (this is the FD given at screen
* creation).
*/
uint32_t handle;
if (iris_bo_export_gem_handle_for_device(bo, screen->winsys_fd, &handle))
return false;
*value = handle;
return true;
}
case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
if (!wants_aux)
iris_gem_set_tiling(bo, &res->surf);
result = iris_bo_export_dmabuf(bo, (int *) &handle) == 0;
if (result)
*value = handle;
return result;
default:
return false;
}
}
static bool
iris_resource_get_handle(struct pipe_screen *pscreen,
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
struct pipe_context *ctx,
struct pipe_resource *resource,
struct winsys_handle *whandle,
unsigned usage)
{
struct iris_screen *screen = (struct iris_screen *) pscreen;
struct iris_resource *res = (struct iris_resource *)resource;
bool mod_with_aux =
res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
iris_resource_disable_aux_on_first_query(resource, usage);
iris: Move suballocated resources to a dedicated allocation on export We don't want to export suballocated resources to external consumers, for a variety of reasons. First of all, it would be exporting random other pieces of memory which we may not want those external consumers to have access to. Secondly, external clients wouldn't be aware of what buffers are packed together and busy-tracking implications there. Nor should they be. And those are just the obvious reasons. When we allocate a resource with the PIPE_BIND_SHARED flag, indicating that it's going to be used externally, we avoid suballocation. However, there are times when the client may suddenly decide to export a texture or buffer, without any prior warning. Since we had no idea this buffer would be exported, we suballocated it. Unfortunately, this means we need to transition it to a dedicated allocation on the fly, by allocating a new buffer and copying the contents over. Making things worse, this often happens in DRI hooks that don't have an associated context (which we need to say, run BLORP commands). We have to create an temporary context for this purpose, perform our blit, then destroy it. The radeonsi driver uses a permanent auxiliary context stored in the screen for this purpose, but we can't do that because it causes circular reference counting. radeonsi doesn't do the reference counting that we do, but also doesn't use u_transfer_helper, so they get lucky in avoiding stale resource->screen pointers. Other drivers don't create an auxiliary context, so they avoid this problem for now. For auxiliary data, rather than copying it over bit-for-bit, we simply copy over the underlying data using iris_copy_region (GPU memcpy), and take whatever the resulting aux state is from that operation. Assuming the copy operation compresses, the result will be compressed. v2: Stop using a screen->aux_context and just invent one on the fly to avoid circular reference counting issues. Acked-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12623>
2021-09-16 11:38:03 -07:00
iris_resource_disable_suballoc_on_first_query(pscreen, ctx, res);
assert(iris_bo_is_real(res->bo));
struct iris_bo *bo;
if (res->mod_info &&
mod_plane_is_clear_color(res->mod_info->modifier, whandle->plane)) {
bo = res->aux.clear_color_bo;
whandle->offset = res->aux.clear_color_offset;
} else if (mod_with_aux && whandle->plane > 0) {
bo = res->aux.bo;
whandle->stride = res->aux.surf.row_pitch_B;
whandle->offset = res->aux.offset;
} else {
/* If this is a buffer, stride should be 0 - no need to special case */
whandle->stride = res->surf.row_pitch_B;
bo = res->bo;
}
whandle->format = res->external_format;
2018-12-09 23:12:33 -08:00
whandle->modifier =
res->mod_info ? res->mod_info->modifier
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
: tiling_to_modifier(isl_tiling_to_i915_tiling(res->surf.tiling));
2018-12-09 23:12:33 -08:00
#ifndef NDEBUG
enum isl_aux_usage allowed_usage =
usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH ? res->aux.usage :
res->mod_info ? res->mod_info->aux_usage : ISL_AUX_USAGE_NONE;
if (res->aux.usage != allowed_usage) {
enum isl_aux_state aux_state = iris_resource_get_aux_state(res, 0, 0);
assert(aux_state == ISL_AUX_STATE_RESOLVED ||
aux_state == ISL_AUX_STATE_PASS_THROUGH);
2018-12-09 23:12:33 -08:00
}
#endif
switch (whandle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
iris_gem_set_tiling(bo, &res->surf);
return iris_bo_flink(bo, &whandle->handle) == 0;
case WINSYS_HANDLE_TYPE_KMS: {
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
iris_gem_set_tiling(bo, &res->surf);
/* Because we share the same drm file across multiple iris_screen, when
* we export a GEM handle we must make sure it is valid in the DRM file
* descriptor the caller is using (this is the FD given at screen
* creation).
*/
uint32_t handle;
if (iris_bo_export_gem_handle_for_device(bo, screen->winsys_fd, &handle))
return false;
whandle->handle = handle;
return true;
}
case WINSYS_HANDLE_TYPE_FD:
iris: Only use SET/GET_TILING when exporting/importing BOs In the past, we tracked bo->tiling_mode and bo->stride, and used GEM_{GET,SET}_TILING on all buffers we allocated. This made more sense in the old days (long before iris even existed) when we used GTT maps to detile resources. However, that support is now gone, and we never used it in iris anyway. We don't need to do this in most cases anymore. We are trying to deprecate these kernel APIs. They have many issues. One is having a global tiling mode for a buffer when userspace may want to suballocate multiple resources with different tiling modes from the same object. Another is...what if processes want to interpret the data differently, and hot-swap the tiling mode out from under another process? Another is the fundamental race conditions. There are many reasons not to use these APIs. Unfortunately, there is still one case where it's used: when importing and exporting DMABUFs, we have to communicate the tiling somehow. The right way to do that is using modifiers, but those didn't always exist, and aren't always enabled (maybe aren't even commonly enabled). So we use GET/SET_TILING as a poor-man's IPC mechanism of sorts. This patch stops calling those APIs in general but continues doing so for imported/exported objects when we don't have modifiers. We eliminate iris_bo_alloc_tiled entirely. There is now only one! One small behavioral change snuck in: iris_memobj_create_from_handle now aligns the virtual address to 64K rather than 1B when modifiers aren't present. This should be harmless, and lets us delete a whole bunch of code. Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11169>
2021-06-02 15:21:35 -07:00
iris_gem_set_tiling(bo, &res->surf);
return iris_bo_export_dmabuf(bo, (int *) &whandle->handle) == 0;
}
return false;
}
static bool
resource_is_busy(struct iris_context *ice,
struct iris_resource *res)
{
bool busy = iris_bo_busy(res->bo);
iris_foreach_batch(ice, batch)
busy |= iris_batch_references(batch, res->bo);
return busy;
}
void
iris_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *p_dst,
struct pipe_resource *p_src,
unsigned num_rebinds,
uint32_t rebind_mask,
uint32_t delete_buffer_id)
{
struct iris_screen *screen = (void *) ctx->screen;
struct iris_context *ice = (void *) ctx;
struct iris_resource *dst = (void *) p_dst;
struct iris_resource *src = (void *) p_src;
assert(memcmp(&dst->surf, &src->surf, sizeof(dst->surf)) == 0);
struct iris_bo *old_bo = dst->bo;
/* Swap out the backing storage */
iris_bo_reference(src->bo);
dst->bo = src->bo;
/* Rebind the buffer, replacing any state referring to the old BO's
* address, and marking state dirty so it's reemitted.
*/
screen->vtbl.rebind_buffer(ice, dst);
iris_bo_unreference(old_bo);
}
static void
iris_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
struct iris_screen *screen = (void *) ctx->screen;
struct iris_context *ice = (void *) ctx;
struct iris_resource *res = (void *) resource;
if (resource->target != PIPE_BUFFER)
return;
/* If it's already invalidated, don't bother doing anything. */
if (res->valid_buffer_range.start > res->valid_buffer_range.end)
return;
if (!resource_is_busy(ice, res)) {
/* The resource is idle, so just mark that it contains no data and
* keep using the same underlying buffer object.
*/
util_range_set_empty(&res->valid_buffer_range);
return;
}
/* Otherwise, try and replace the backing storage with a new BO. */
/* We can't reallocate memory we didn't allocate in the first place. */
if (res->bo->gem_handle && res->bo->real.userptr)
return;
struct iris_bo *old_bo = res->bo;
struct iris_bo *new_bo =
iris_bo_alloc(screen->bufmgr, res->bo->name, resource->width0, 1,
iris_memzone_for_address(old_bo->address), 0);
if (!new_bo)
return;
/* Swap out the backing storage */
res->bo = new_bo;
/* Rebind the buffer, replacing any state referring to the old BO's
* address, and marking state dirty so it's reemitted.
*/
screen->vtbl.rebind_buffer(ice, res);
util_range_set_empty(&res->valid_buffer_range);
iris_bo_unreference(old_bo);
}
static void
iris_flush_staging_region(struct pipe_transfer *xfer,
const struct pipe_box *flush_box)
{
if (!(xfer->usage & PIPE_MAP_WRITE))
return;
struct iris_transfer *map = (void *) xfer;
struct pipe_box src_box = *flush_box;
/* Account for extra alignment padding in staging buffer */
if (xfer->resource->target == PIPE_BUFFER)
src_box.x += xfer->box.x % IRIS_MAP_BUFFER_ALIGNMENT;
struct pipe_box dst_box = (struct pipe_box) {
.x = xfer->box.x + flush_box->x,
.y = xfer->box.y + flush_box->y,
.z = xfer->box.z + flush_box->z,
.width = flush_box->width,
.height = flush_box->height,
.depth = flush_box->depth,
};
iris_copy_region(map->blorp, map->batch, xfer->resource, xfer->level,
dst_box.x, dst_box.y, dst_box.z, map->staging, 0,
&src_box);
}
static void
iris_unmap_copy_region(struct iris_transfer *map)
{
iris_resource_destroy(map->staging->screen, map->staging);
map->ptr = NULL;
}
static void
iris_map_copy_region(struct iris_transfer *map)
{
struct pipe_screen *pscreen = &map->batch->screen->base;
struct pipe_transfer *xfer = &map->base.b;
struct pipe_box *box = &xfer->box;
struct iris_resource *res = (void *) xfer->resource;
unsigned extra = xfer->resource->target == PIPE_BUFFER ?
box->x % IRIS_MAP_BUFFER_ALIGNMENT : 0;
struct pipe_resource templ = (struct pipe_resource) {
.usage = PIPE_USAGE_STAGING,
.width0 = box->width + extra,
.height0 = box->height,
.depth0 = 1,
.nr_samples = xfer->resource->nr_samples,
.nr_storage_samples = xfer->resource->nr_storage_samples,
.array_size = box->depth,
.format = res->internal_format,
};
if (xfer->resource->target == PIPE_BUFFER)
templ.target = PIPE_BUFFER;
else if (templ.array_size > 1)
templ.target = PIPE_TEXTURE_2D_ARRAY;
else
templ.target = PIPE_TEXTURE_2D;
map->staging = iris_resource_create(pscreen, &templ);
assert(map->staging);
if (templ.target != PIPE_BUFFER) {
struct isl_surf *surf = &((struct iris_resource *) map->staging)->surf;
xfer->stride = isl_surf_get_row_pitch_B(surf);
xfer->layer_stride = isl_surf_get_array_pitch(surf);
}
if (!(xfer->usage & PIPE_MAP_DISCARD_RANGE)) {
iris_copy_region(map->blorp, map->batch, map->staging, 0, extra, 0, 0,
xfer->resource, xfer->level, box);
/* Ensure writes to the staging BO land before we map it below. */
iris_emit_pipe_control_flush(map->batch,
"transfer read: flush before mapping",
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_CS_STALL);
}
struct iris_bo *staging_bo = iris_resource_bo(map->staging);
if (iris_batch_references(map->batch, staging_bo))
iris_batch_flush(map->batch);
assert(((struct iris_resource *)map->staging)->offset == 0);
map->ptr =
iris_bo_map(map->dbg, staging_bo, xfer->usage & MAP_FLAGS) + extra;
map->unmap = iris_unmap_copy_region;
}
static void
get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z,
unsigned *out_x0_el, unsigned *out_y0_el)
{
ASSERTED uint32_t z0_el, a0_el;
if (surf->dim == ISL_SURF_DIM_3D) {
isl_surf_get_image_offset_el(surf, level, 0, z,
out_x0_el, out_y0_el, &z0_el, &a0_el);
} else {
isl_surf_get_image_offset_el(surf, level, z, 0,
out_x0_el, out_y0_el, &z0_el, &a0_el);
}
assert(z0_el == 0 && a0_el == 0);
}
/**
* Get pointer offset into stencil buffer.
*
* The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
* must decode the tile's layout in software.
*
* See
* - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
* Format.
* - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
*
* Even though the returned offset is always positive, the return type is
* signed due to
* commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
* mesa: Fix return type of _mesa_get_format_bytes() (#37351)
*/
static intptr_t
s8_offset(uint32_t stride, uint32_t x, uint32_t y)
{
uint32_t tile_size = 4096;
uint32_t tile_width = 64;
uint32_t tile_height = 64;
uint32_t row_size = 64 * stride / 2; /* Two rows are interleaved. */
uint32_t tile_x = x / tile_width;
uint32_t tile_y = y / tile_height;
/* The byte's address relative to the tile's base addres. */
uint32_t byte_x = x % tile_width;
uint32_t byte_y = y % tile_height;
uintptr_t u = tile_y * row_size
+ tile_x * tile_size
+ 512 * (byte_x / 8)
+ 64 * (byte_y / 8)
+ 32 * ((byte_y / 4) % 2)
+ 16 * ((byte_x / 4) % 2)
+ 8 * ((byte_y / 2) % 2)
+ 4 * ((byte_x / 2) % 2)
+ 2 * (byte_y % 2)
+ 1 * (byte_x % 2);
return u;
}
static void
iris_unmap_s8(struct iris_transfer *map)
{
struct pipe_transfer *xfer = &map->base.b;
const struct pipe_box *box = &xfer->box;
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
if (xfer->usage & PIPE_MAP_WRITE) {
uint8_t *untiled_s8_map = map->ptr;
uint8_t *tiled_s8_map = res->offset +
iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
for (int s = 0; s < box->depth; s++) {
unsigned x0_el, y0_el;
get_image_offset_el(surf, xfer->level, box->z + s, &x0_el, &y0_el);
for (uint32_t y = 0; y < box->height; y++) {
for (uint32_t x = 0; x < box->width; x++) {
ptrdiff_t offset = s8_offset(surf->row_pitch_B,
x0_el + box->x + x,
y0_el + box->y + y);
tiled_s8_map[offset] =
untiled_s8_map[s * xfer->layer_stride + y * xfer->stride + x];
}
}
}
}
free(map->buffer);
}
static void
iris_map_s8(struct iris_transfer *map)
{
struct pipe_transfer *xfer = &map->base.b;
const struct pipe_box *box = &xfer->box;
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
xfer->stride = surf->row_pitch_B;
xfer->layer_stride = xfer->stride * box->height;
/* The tiling and detiling functions require that the linear buffer has
* a 16-byte alignment (that is, its `x0` is 16-byte aligned). Here we
* over-allocate the linear buffer to get the proper alignment.
*/
map->buffer = map->ptr = malloc(xfer->layer_stride * box->depth);
assert(map->buffer);
/* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
* INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
* invalidate is set, since we'll be writing the whole rectangle from our
* temporary buffer back out.
*/
if (!(xfer->usage & PIPE_MAP_DISCARD_RANGE)) {
uint8_t *untiled_s8_map = map->ptr;
uint8_t *tiled_s8_map = res->offset +
iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
for (int s = 0; s < box->depth; s++) {
unsigned x0_el, y0_el;
get_image_offset_el(surf, xfer->level, box->z + s, &x0_el, &y0_el);
for (uint32_t y = 0; y < box->height; y++) {
for (uint32_t x = 0; x < box->width; x++) {
ptrdiff_t offset = s8_offset(surf->row_pitch_B,
x0_el + box->x + x,
y0_el + box->y + y);
untiled_s8_map[s * xfer->layer_stride + y * xfer->stride + x] =
tiled_s8_map[offset];
}
}
}
}
map->unmap = iris_unmap_s8;
}
2018-05-28 17:14:43 -07:00
/* Compute extent parameters for use with tiled_memcpy functions.
* xs are in units of bytes and ys are in units of strides.
*/
static inline void
tile_extents(const struct isl_surf *surf,
2018-05-28 17:14:43 -07:00
const struct pipe_box *box,
unsigned level, int z,
unsigned *x1_B, unsigned *x2_B,
unsigned *y1_el, unsigned *y2_el)
2018-05-28 17:14:43 -07:00
{
const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
const unsigned cpp = fmtl->bpb / 8;
assert(box->x % fmtl->bw == 0);
assert(box->y % fmtl->bh == 0);
unsigned x0_el, y0_el;
get_image_offset_el(surf, level, box->z + z, &x0_el, &y0_el);
2018-05-28 17:14:43 -07:00
*x1_B = (box->x / fmtl->bw + x0_el) * cpp;
*y1_el = box->y / fmtl->bh + y0_el;
*x2_B = (DIV_ROUND_UP(box->x + box->width, fmtl->bw) + x0_el) * cpp;
*y2_el = DIV_ROUND_UP(box->y + box->height, fmtl->bh) + y0_el;
}
static void
iris_unmap_tiled_memcpy(struct iris_transfer *map)
{
struct pipe_transfer *xfer = &map->base.b;
const struct pipe_box *box = &xfer->box;
2018-05-28 17:14:43 -07:00
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
const bool has_swizzling = false;
2018-05-28 17:14:43 -07:00
if (xfer->usage & PIPE_MAP_WRITE) {
char *dst = res->offset +
iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
2018-05-28 17:14:43 -07:00
for (int s = 0; s < box->depth; s++) {
unsigned x1, x2, y1, y2;
tile_extents(surf, box, xfer->level, s, &x1, &x2, &y1, &y2);
2018-07-12 14:44:55 -07:00
void *ptr = map->ptr + s * xfer->layer_stride;
2018-07-12 14:44:55 -07:00
isl_memcpy_linear_to_tiled(x1, x2, y1, y2, dst, ptr,
surf->row_pitch_B, xfer->stride,
has_swizzling, surf->tiling, ISL_MEMCPY);
}
2018-05-28 17:14:43 -07:00
}
os_free_aligned(map->buffer);
map->buffer = map->ptr = NULL;
}
static void
iris_map_tiled_memcpy(struct iris_transfer *map)
{
struct pipe_transfer *xfer = &map->base.b;
const struct pipe_box *box = &xfer->box;
2018-05-28 17:14:43 -07:00
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct isl_surf *surf = &res->surf;
2018-07-12 14:44:55 -07:00
xfer->stride = ALIGN(surf->row_pitch_B, 16);
xfer->layer_stride = xfer->stride * box->height;
2018-07-12 14:44:55 -07:00
2018-06-24 00:27:58 -07:00
unsigned x1, x2, y1, y2;
tile_extents(surf, box, xfer->level, 0, &x1, &x2, &y1, &y2);
2018-05-28 17:14:43 -07:00
/* The tiling and detiling functions require that the linear buffer has
* a 16-byte alignment (that is, its `x0` is 16-byte aligned). Here we
* over-allocate the linear buffer to get the proper alignment.
*/
2018-07-03 23:44:40 -07:00
map->buffer =
os_malloc_aligned(xfer->layer_stride * box->depth, 16);
2018-05-28 17:14:43 -07:00
assert(map->buffer);
2018-07-12 14:44:55 -07:00
map->ptr = (char *)map->buffer + (x1 & 0xf);
2018-05-28 17:14:43 -07:00
const bool has_swizzling = false;
2018-05-28 17:14:43 -07:00
if (!(xfer->usage & PIPE_MAP_DISCARD_RANGE)) {
char *src = res->offset +
iris_bo_map(map->dbg, res->bo, (xfer->usage | MAP_RAW) & MAP_FLAGS);
2018-05-28 17:14:43 -07:00
for (int s = 0; s < box->depth; s++) {
unsigned x1, x2, y1, y2;
tile_extents(surf, box, xfer->level, s, &x1, &x2, &y1, &y2);
2018-07-12 14:44:55 -07:00
/* Use 's' rather than 'box->z' to rebase the first slice to 0. */
void *ptr = map->ptr + s * xfer->layer_stride;
isl_memcpy_tiled_to_linear(x1, x2, y1, y2, ptr, src, xfer->stride,
surf->row_pitch_B, has_swizzling,
surf->tiling, ISL_MEMCPY_STREAMING_LOAD);
2018-07-12 14:44:55 -07:00
}
2018-05-28 17:14:43 -07:00
}
map->unmap = iris_unmap_tiled_memcpy;
}
2018-06-24 00:27:58 -07:00
static void
iris_map_direct(struct iris_transfer *map)
{
struct pipe_transfer *xfer = &map->base.b;
2018-06-24 00:27:58 -07:00
struct pipe_box *box = &xfer->box;
struct iris_resource *res = (struct iris_resource *) xfer->resource;
void *ptr = res->offset +
iris_bo_map(map->dbg, res->bo, xfer->usage & MAP_FLAGS);
2018-06-24 00:27:58 -07:00
if (res->base.b.target == PIPE_BUFFER) {
xfer->stride = 0;
xfer->layer_stride = 0;
2018-07-12 14:44:55 -07:00
map->ptr = ptr + box->x;
} else {
struct isl_surf *surf = &res->surf;
const struct isl_format_layout *fmtl =
isl_format_get_layout(surf->format);
const unsigned cpp = fmtl->bpb / 8;
unsigned x0_el, y0_el;
assert(box->x % fmtl->bw == 0);
assert(box->y % fmtl->bh == 0);
get_image_offset_el(surf, xfer->level, box->z, &x0_el, &y0_el);
2018-06-24 00:27:58 -07:00
x0_el += box->x / fmtl->bw;
y0_el += box->y / fmtl->bh;
xfer->stride = isl_surf_get_row_pitch_B(surf);
xfer->layer_stride = isl_surf_get_array_pitch(surf);
map->ptr = ptr + y0_el * xfer->stride + x0_el * cpp;
}
2018-06-24 00:27:58 -07:00
}
static bool
can_promote_to_async(const struct iris_resource *res,
const struct pipe_box *box,
enum pipe_map_flags usage)
{
/* If we're writing to a section of the buffer that hasn't even been
* initialized with useful data, then we can safely promote this write
* to be unsynchronized. This helps the common pattern of appending data.
*/
return res->base.b.target == PIPE_BUFFER && (usage & PIPE_MAP_WRITE) &&
!(usage & TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED) &&
!util_ranges_intersect(&res->valid_buffer_range, box->x,
box->x + box->width);
}
static void *
iris_transfer_map(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
enum pipe_map_flags usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
struct iris_context *ice = (struct iris_context *)ctx;
struct iris_resource *res = (struct iris_resource *)resource;
2018-05-28 17:14:43 -07:00
struct isl_surf *surf = &res->surf;
if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
/* Replace the backing storage with a fresh buffer for non-async maps */
if (!(usage & (PIPE_MAP_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INVALIDATE)))
iris_invalidate_resource(ctx, resource);
/* If we can discard the whole resource, we can discard the range. */
usage |= PIPE_MAP_DISCARD_RANGE;
}
if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
can_promote_to_async(res, box, usage)) {
usage |= PIPE_MAP_UNSYNCHRONIZED;
}
/* Avoid using GPU copies for persistent/coherent buffers, as the idea
* there is to access them simultaneously on the CPU & GPU. This also
* avoids trying to use GPU copies for our u_upload_mgr buffers which
* contain state we're constructing for a GPU draw call, which would
* kill us with infinite stack recursion.
*/
if (usage & (PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))
usage |= PIPE_MAP_DIRECTLY;
/* We cannot provide a direct mapping of tiled resources, and we
* may not be able to mmap imported BOs since they may come from
* other devices that I915_GEM_MMAP cannot work with.
*/
if ((usage & PIPE_MAP_DIRECTLY) &&
(surf->tiling != ISL_TILING_LINEAR || iris_bo_is_imported(res->bo)))
return NULL;
bool map_would_stall = false;
if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
map_would_stall =
resource_is_busy(ice, res) ||
iris_has_invalid_primary(res, level, 1, box->z, box->depth);
if (map_would_stall && (usage & PIPE_MAP_DONTBLOCK) &&
(usage & PIPE_MAP_DIRECTLY))
return NULL;
}
struct iris_transfer *map;
if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
map = slab_zalloc(&ice->transfer_pool_unsync);
else
map = slab_zalloc(&ice->transfer_pool);
2018-05-28 17:14:43 -07:00
if (!map)
return NULL;
struct pipe_transfer *xfer = &map->base.b;
2018-05-28 17:14:43 -07:00
map->dbg = &ice->dbg;
pipe_resource_reference(&xfer->resource, resource);
xfer->level = level;
xfer->usage = usage;
xfer->box = *box;
*ptransfer = xfer;
map->dest_had_defined_contents =
util_ranges_intersect(&res->valid_buffer_range, box->x,
box->x + box->width);
if (usage & PIPE_MAP_WRITE)
util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width);
if (iris_bo_mmap_mode(res->bo) != IRIS_MMAP_NONE) {
/* GPU copies are not useful for buffer reads. Instead of stalling to
* read from the original buffer, we'd simply copy it to a temporary...
* then stall (a bit longer) to read from that buffer.
*
* Images are less clear-cut. Resolves can be destructive, removing
* some of the underlying compression, so we'd rather blit the data to
* a linear temporary and map that, to avoid the resolve.
*/
if (!(usage & PIPE_MAP_DISCARD_RANGE) &&
!iris_has_invalid_primary(res, level, 1, box->z, box->depth)) {
usage |= PIPE_MAP_DIRECTLY;
}
/* We can map directly if it wouldn't stall, there's no compression,
* and we aren't doing an uncached read.
*/
if (!map_would_stall &&
!isl_aux_usage_has_compression(res->aux.usage) &&
!((usage & PIPE_MAP_READ) &&
iris_bo_mmap_mode(res->bo) != IRIS_MMAP_WB)) {
usage |= PIPE_MAP_DIRECTLY;
}
}
/* TODO: Teach iris_map_tiled_memcpy about Tile4... */
if (res->surf.tiling == ISL_TILING_4)
usage &= ~PIPE_MAP_DIRECTLY;
if (!(usage & PIPE_MAP_DIRECTLY)) {
/* If we need a synchronous mapping and the resource is busy, or needs
* resolving, we copy to/from a linear temporary buffer using the GPU.
*/
map->batch = &ice->batches[IRIS_BATCH_RENDER];
map->blorp = &ice->blorp;
iris_map_copy_region(map);
2018-05-28 17:14:43 -07:00
} else {
/* Otherwise we're free to map on the CPU. */
if (resource->target != PIPE_BUFFER) {
iris_resource_access_raw(ice, res, level, box->z, box->depth,
usage & PIPE_MAP_WRITE);
}
if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
iris_foreach_batch(ice, batch) {
if (iris_batch_references(batch, res->bo))
iris_batch_flush(batch);
}
}
if (surf->tiling == ISL_TILING_W) {
/* TODO: Teach iris_map_tiled_memcpy about W-tiling... */
iris_map_s8(map);
} else if (surf->tiling != ISL_TILING_LINEAR) {
iris_map_tiled_memcpy(map);
} else {
iris_map_direct(map);
}
2018-05-28 17:14:43 -07:00
}
2018-05-28 17:14:43 -07:00
return map->ptr;
}
static void
iris_transfer_flush_region(struct pipe_context *ctx,
struct pipe_transfer *xfer,
const struct pipe_box *box)
{
struct iris_context *ice = (struct iris_context *)ctx;
struct iris_resource *res = (struct iris_resource *) xfer->resource;
struct iris_transfer *map = (void *) xfer;
if (map->staging)
iris_flush_staging_region(xfer, box);
if (res->base.b.target == PIPE_BUFFER) {
util_range_add(&res->base.b, &res->valid_buffer_range, box->x, box->x + box->width);
}
/* Make sure we flag constants dirty even if there's no need to emit
* any PIPE_CONTROLs to a batch.
*/
iris_dirty_for_history(ice, res);
}
static void
iris_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *xfer)
{
struct iris_context *ice = (struct iris_context *)ctx;
struct iris_transfer *map = (void *) xfer;
if (!(xfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
PIPE_MAP_COHERENT))) {
struct pipe_box flush_box = {
.x = 0, .y = 0, .z = 0,
.width = xfer->box.width,
.height = xfer->box.height,
.depth = xfer->box.depth,
};
iris_transfer_flush_region(ctx, xfer, &flush_box);
}
2018-05-28 17:14:43 -07:00
if (map->unmap)
map->unmap(map);
pipe_resource_reference(&xfer->resource, NULL);
/* transfer_unmap is always called from the driver thread, so we have to
* use transfer_pool, not transfer_pool_unsync. Freeing an object into a
* different pool is allowed, however.
*/
slab_free(&ice->transfer_pool, map);
}
2019-03-14 01:21:16 -07:00
/**
* The pipe->texture_subdata() driver hook.
*
* Mesa's state tracker takes this path whenever possible, even with
* PIPE_CAP_TEXTURE_TRANSFER_MODES set.
2019-03-14 01:21:16 -07:00
*/
static void
iris_texture_subdata(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
unsigned layer_stride)
{
struct iris_context *ice = (struct iris_context *)ctx;
struct iris_resource *res = (struct iris_resource *)resource;
const struct isl_surf *surf = &res->surf;
assert(resource->target != PIPE_BUFFER);
/* Just use the transfer-based path for linear buffers - it will already
* do a direct mapping, or a simple linear staging buffer.
*
* Linear staging buffers appear to be better than tiled ones, too, so
* take that path if we need the GPU to perform color compression, or
* stall-avoidance blits.
*
* TODO: Teach isl_memcpy_linear_to_tiled about Tile4...
2019-03-14 01:21:16 -07:00
*/
if (surf->tiling == ISL_TILING_LINEAR ||
surf->tiling == ISL_TILING_4 ||
isl_aux_usage_has_compression(res->aux.usage) ||
resource_is_busy(ice, res) ||
iris_bo_mmap_mode(res->bo) == IRIS_MMAP_NONE) {
2019-03-14 01:21:16 -07:00
return u_default_texture_subdata(ctx, resource, level, usage, box,
data, stride, layer_stride);
}
/* No state trackers pass any flags other than PIPE_MAP_WRITE */
2019-03-14 01:21:16 -07:00
iris_resource_access_raw(ice, res, level, box->z, box->depth, true);
iris_foreach_batch(ice, batch) {
if (iris_batch_references(batch, res->bo))
iris_batch_flush(batch);
2019-03-14 01:21:16 -07:00
}
uint8_t *dst = iris_bo_map(&ice->dbg, res->bo, MAP_WRITE | MAP_RAW);
for (int s = 0; s < box->depth; s++) {
const uint8_t *src = data + s * layer_stride;
if (surf->tiling == ISL_TILING_W) {
unsigned x0_el, y0_el;
get_image_offset_el(surf, level, box->z + s, &x0_el, &y0_el);
for (unsigned y = 0; y < box->height; y++) {
for (unsigned x = 0; x < box->width; x++) {
ptrdiff_t offset = s8_offset(surf->row_pitch_B,
x0_el + box->x + x,
y0_el + box->y + y);
dst[offset] = src[y * stride + x];
}
}
} else {
unsigned x1, x2, y1, y2;
tile_extents(surf, box, level, s, &x1, &x2, &y1, &y2);
isl_memcpy_linear_to_tiled(x1, x2, y1, y2,
(void *)dst, (void *)src,
surf->row_pitch_B, stride,
false, surf->tiling, ISL_MEMCPY);
}
}
}
/**
* Mark state dirty that needs to be re-emitted when a resource is written.
*/
void
iris_dirty_for_history(struct iris_context *ice,
struct iris_resource *res)
{
const uint64_t stages = res->bind_stages;
uint64_t dirty = 0ull;
uint64_t stage_dirty = 0ull;
if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
if (stages & (1u << stage)) {
struct iris_shader_state *shs = &ice->state.shaders[stage];
shs->dirty_cbufs |= ~0u;
}
}
dirty |= IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES;
stage_dirty |= (stages << IRIS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS);
}
if (res->bind_history & (PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_SHADER_IMAGE)) {
dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
stage_dirty |= (stages << IRIS_SHIFT_FOR_STAGE_DIRTY_BINDINGS);
}
if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
dirty |= IRIS_DIRTY_RENDER_MISC_BUFFER_FLUSHES |
IRIS_DIRTY_COMPUTE_MISC_BUFFER_FLUSHES;
stage_dirty |= (stages << IRIS_SHIFT_FOR_STAGE_DIRTY_BINDINGS);
}
if (res->bind_history & PIPE_BIND_VERTEX_BUFFER)
dirty |= IRIS_DIRTY_VERTEX_BUFFER_FLUSHES;
if (ice->state.streamout_active && (res->bind_history & PIPE_BIND_STREAM_OUTPUT))
dirty |= IRIS_DIRTY_SO_BUFFERS;
ice->state.dirty |= dirty;
ice->state.stage_dirty |= stage_dirty;
}
bool
iris_resource_set_clear_color(struct iris_context *ice,
struct iris_resource *res,
union isl_color_value color)
{
if (res->aux.clear_color_unknown ||
memcmp(&res->aux.clear_color, &color, sizeof(color)) != 0) {
res->aux.clear_color = color;
res->aux.clear_color_unknown = false;
return true;
}
return false;
}
static enum pipe_format
iris_resource_get_internal_format(struct pipe_resource *p_res)
{
struct iris_resource *res = (void *) p_res;
return res->internal_format;
}
static const struct u_transfer_vtbl transfer_vtbl = {
.resource_create = iris_resource_create,
.resource_destroy = iris_resource_destroy,
.transfer_map = iris_transfer_map,
.transfer_unmap = iris_transfer_unmap,
.transfer_flush_region = iris_transfer_flush_region,
.get_internal_format = iris_resource_get_internal_format,
.set_stencil = iris_resource_set_separate_stencil,
.get_stencil = iris_resource_get_separate_stencil,
};
void
iris_init_screen_resource_functions(struct pipe_screen *pscreen)
{
pscreen->query_dmabuf_modifiers = iris_query_dmabuf_modifiers;
pscreen->is_dmabuf_modifier_supported = iris_is_dmabuf_modifier_supported;
pscreen->get_dmabuf_modifier_planes = iris_get_dmabuf_modifier_planes;
pscreen->resource_create_with_modifiers =
iris_resource_create_with_modifiers;
pscreen->resource_create = u_transfer_helper_resource_create;
pscreen->resource_from_user_memory = iris_resource_from_user_memory;
pscreen->resource_from_handle = iris_resource_from_handle;
pscreen->resource_from_memobj = iris_resource_from_memobj_wrapper;
pscreen->resource_get_handle = iris_resource_get_handle;
pscreen->resource_get_param = iris_resource_get_param;
pscreen->resource_destroy = u_transfer_helper_resource_destroy;
pscreen->memobj_create_from_handle = iris_memobj_create_from_handle;
pscreen->memobj_destroy = iris_memobj_destroy;
pscreen->transfer_helper =
u_transfer_helper_create(&transfer_vtbl, true, true, false, true, false);
}
void
iris_init_resource_functions(struct pipe_context *ctx)
{
ctx->flush_resource = iris_flush_resource;
ctx->invalidate_resource = iris_invalidate_resource;
ctx->buffer_map = u_transfer_helper_transfer_map;
ctx->texture_map = u_transfer_helper_transfer_map;
ctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
ctx->buffer_unmap = u_transfer_helper_transfer_unmap;
ctx->texture_unmap = u_transfer_helper_transfer_unmap;
2018-06-07 01:25:35 -07:00
ctx->buffer_subdata = u_default_buffer_subdata;
2019-03-14 01:21:16 -07:00
ctx->texture_subdata = iris_texture_subdata;
}