mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
nil: Add tiled memcpy helpers
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30044>
This commit is contained in:
parent
1c131de30e
commit
b99f28b7d6
2 changed files with 595 additions and 0 deletions
594
src/nouveau/nil/copy.rs
Normal file
594
src/nouveau/nil/copy.rs
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
// Copyright © 2024 Valve Corp. and Collabora, Ltd.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
use crate::extent::{units, Extent4D, Offset4D};
|
||||
use crate::tiling::Tiling;
|
||||
|
||||
use std::ffi::c_void;
|
||||
use std::ops::Range;
|
||||
|
||||
// This file is dedicated to the internal tiling layout, mainly in the context
|
||||
// of CPU-based tiled memcpy implementations (and helpers) for VK_EXT_host_image_copy
|
||||
//
|
||||
// Work here is based on isl_tiled_memcpy, fd6_tiled_memcpy, old work by Rebecca Mckeever,
|
||||
// and https://fgiesen.wordpress.com/2011/01/17/texture-tiling-and-swizzling/
|
||||
//
|
||||
// On NVIDIA, the tiling system is a two-tier one, and images are first tiled in
|
||||
// a grid of rows of tiles (called "Blocks") with one or more columns:
|
||||
//
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 0 | Block 1 | Block 2 | Block 3 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 4 | Block 5 | Block 6 | Block 7 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 8 | Block 9 | Block 10 | Block 11 |
|
||||
// +----------+----------+----------+----------+
|
||||
//
|
||||
// The blocks themselves are ordered linearly as can be seen above, which is
|
||||
// where the "Block Linear" naming comes from for NVIDIA's tiling scheme.
|
||||
//
|
||||
// For 3D images, each block continues in the Z direction such that tiles
|
||||
// contain multiple Z slices. If the image depth is longer than the
|
||||
// block depth, there will be more than one layer of blocks, where a layer is
|
||||
// made up of 1 or more Z slices. For example, if the above tile pattern was
|
||||
// the first layer of a multilayer arrangement, the second layer would be:
|
||||
//
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 12 | Block 13 | Block 14 | Block 15 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 16 | Block 17 | Block 18 | Block 19 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Block 20 | Block 21 | Block 22 | Block 23 |
|
||||
// +----------+----------+----------+----------+
|
||||
//
|
||||
// The number of rows, columns, and layers of tiles can thus be deduced to be:
|
||||
// rows >= ceiling(image_height / block_height)
|
||||
// columns >= ceiling(image_width / block_width)
|
||||
// layers >= ceiling(image_depth / block_depth)
|
||||
//
|
||||
// Where block_width is a constant 64B (unless for sparse) and block_height
|
||||
// can be either 8 or 16 GOBs tall (more on GOBs below). For us, block_depth
|
||||
// is one for now.
|
||||
//
|
||||
// The >= is in case the blocks around the edges are partial.
|
||||
//
|
||||
// Now comes the second tier. Each block is composed of GOBs (Groups of Bytes)
|
||||
// arranged in ascending order in a single column:
|
||||
//
|
||||
// +---------------------------+
|
||||
// | GOB 0 |
|
||||
// +---------------------------+
|
||||
// | GOB 1 |
|
||||
// +---------------------------+
|
||||
// | GOB 2 |
|
||||
// +---------------------------+
|
||||
// | GOB 3 |
|
||||
// +---------------------------+
|
||||
//
|
||||
// The number of GOBs in a full block is
|
||||
// block_height * block_depth
|
||||
//
|
||||
// An Ampere GOB is 512 bytes, arranged in a 64x8 layout and split into Sectors.
|
||||
// Each Sector is 32 Bytes, and the Sectors in a GOB are arranged in a 16x2
|
||||
// layout (i.e., two 16B lines on top of each other). It's then arranged into
|
||||
// two columns that are 2 sectors by 4, leading to a 4x4 grid of sectors:
|
||||
//
|
||||
// +----------+----------+----------+----------+
|
||||
// | Sector 0 | Sector 1 | Sector 0 | Sector 1 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Sector 2 | Sector 3 | Sector 2 | Sector 3 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Sector 4 | Sector 5 | Sector 4 | Sector 5 |
|
||||
// +----------+----------+----------+----------+
|
||||
// | Sector 6 | Sector 7 | Sector 6 | Sector 7 |
|
||||
// +----------+----------+----------+----------+
|
||||
//
|
||||
// From the given pixel address equations in the Orin manual, we arrived at
|
||||
// the following bit interleave pattern for the pixel address:
|
||||
//
|
||||
// b8 b7 b6 b5 b4 b3 b2 b1 b0
|
||||
// --------------------------
|
||||
// x5 y2 y1 x4 y0 x3 x2 x1 x0
|
||||
//
|
||||
// Which would look something like this:
|
||||
// fn get_pixel_offset(
|
||||
// x: usize,
|
||||
// y: usize,
|
||||
// ) -> usize {
|
||||
// (x & 15) |
|
||||
// (y & 1) << 4 |
|
||||
// (x & 16) << 1 |
|
||||
// (y & 2) << 5 |
|
||||
// (x & 32) << 3
|
||||
// }
|
||||
//
|
||||
//
|
||||
|
||||
// The way our implementation will work is by splitting an image into tiles, then
|
||||
// each tile will be broken into its GOBs, and finally each GOB into sectors,
|
||||
// where each sector will be copied into its position.
|
||||
//
|
||||
// For code sharing and cleanliness, we write everything to be very generic,
|
||||
// so as to be shared between Linear <-> Tiled and Tiled <-> Linear paths, and
|
||||
// (ab)use Rust's traits to specialize the last level (copy_gob/copy_whole_gob)
|
||||
// for a particular direction.
|
||||
//
|
||||
// The copy_x and copy_whole_x distinction is made because if we can guarantee
|
||||
// that tiles/gobs are whole and aligned, we can skip all bounds checking and
|
||||
// copy things in fast and tight loops
|
||||
|
||||
/// Copies a GOB
|
||||
///
|
||||
/// This trait should be implemented twice for each GOB type, once for
|
||||
/// tiled-to-linear and once for linear-to-tiled. This allows to implement
|
||||
/// the rest of tiled copies in a generic way.
|
||||
trait CopyGOB {
|
||||
const GOB_EXTENT_B: Extent4D<units::Bytes>;
|
||||
const X_DIVISOR: u32;
|
||||
|
||||
unsafe fn copy_gob(
|
||||
tiled: usize,
|
||||
linear: LinearPointer,
|
||||
start: Offset4D<units::Bytes>,
|
||||
end: Offset4D<units::Bytes>,
|
||||
);
|
||||
|
||||
// No bounding box for this one
|
||||
unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
|
||||
Self::copy_gob(
|
||||
tiled,
|
||||
linear,
|
||||
Offset4D::new(0, 0, 0, 0),
|
||||
Offset4D::new(0, 0, 0, 0) + Self::GOB_EXTENT_B,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies at most 16B of data to/from linear
|
||||
trait Copy16B {
|
||||
const X_DIVISOR: u32;
|
||||
|
||||
unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize);
|
||||
unsafe fn copy_16b(tiled: *mut [u8; 16], linear: *mut [u8; 16]) {
|
||||
Self::copy(tiled as *mut _, linear as *mut _, 16);
|
||||
}
|
||||
}
|
||||
|
||||
struct CopyGOBTuring2D<C: Copy16B> {
|
||||
phantom: std::marker::PhantomData<C>,
|
||||
}
|
||||
|
||||
impl<C: Copy16B> CopyGOBTuring2D<C> {
|
||||
fn for_each_16b(mut f: impl FnMut(u32, u32, u32)) {
|
||||
for i in 0..2 {
|
||||
f(i * 0x100 + 0x00, i * 32 + 0, 0);
|
||||
f(i * 0x100 + 0x10, i * 32 + 0, 1);
|
||||
f(i * 0x100 + 0x20, i * 32 + 0, 2);
|
||||
f(i * 0x100 + 0x30, i * 32 + 0, 3);
|
||||
|
||||
f(i * 0x100 + 0x40, i * 32 + 16, 0);
|
||||
f(i * 0x100 + 0x50, i * 32 + 16, 1);
|
||||
f(i * 0x100 + 0x60, i * 32 + 16, 2);
|
||||
f(i * 0x100 + 0x70, i * 32 + 16, 3);
|
||||
|
||||
f(i * 0x100 + 0x80, i * 32 + 0, 4);
|
||||
f(i * 0x100 + 0x90, i * 32 + 0, 5);
|
||||
f(i * 0x100 + 0xa0, i * 32 + 0, 6);
|
||||
f(i * 0x100 + 0xb0, i * 32 + 0, 7);
|
||||
|
||||
f(i * 0x100 + 0xc0, i * 32 + 16, 4);
|
||||
f(i * 0x100 + 0xd0, i * 32 + 16, 5);
|
||||
f(i * 0x100 + 0xe0, i * 32 + 16, 6);
|
||||
f(i * 0x100 + 0xf0, i * 32 + 16, 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Copy16B> CopyGOB for CopyGOBTuring2D<C> {
|
||||
const GOB_EXTENT_B: Extent4D<units::Bytes> = Extent4D::new(64, 8, 1, 1);
|
||||
const X_DIVISOR: u32 = C::X_DIVISOR;
|
||||
|
||||
unsafe fn copy_gob(
|
||||
tiled: usize,
|
||||
linear: LinearPointer,
|
||||
start: Offset4D<units::Bytes>,
|
||||
end: Offset4D<units::Bytes>,
|
||||
) {
|
||||
Self::for_each_16b(|offset, x, y| {
|
||||
if y >= start.y && y < end.y {
|
||||
let tiled = tiled + (offset as usize);
|
||||
let linear = linear.at(Offset4D::new(x, y, 0, 0));
|
||||
if x >= start.x && x + 16 <= end.x {
|
||||
C::copy_16b(tiled as *mut _, linear as *mut _);
|
||||
} else if x + 16 >= start.x && x < end.x {
|
||||
let start = (std::cmp::max(x, start.x) - x) as usize;
|
||||
let end = std::cmp::min(end.x - x, 16) as usize;
|
||||
C::copy(
|
||||
(tiled + start) as *mut _,
|
||||
(linear + start) as *mut _,
|
||||
end - start,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
|
||||
Self::for_each_16b(|offset, x, y| {
|
||||
let tiled = tiled + (offset as usize);
|
||||
let linear = linear.at(Offset4D::new(x, y, 0, 0));
|
||||
C::copy_16b(tiled as *mut _, linear as *mut _);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn aligned_range(start: u32, end: u32, align: u32) -> Range<u32> {
|
||||
debug_assert!(align.is_power_of_two());
|
||||
let align_1 = align - 1;
|
||||
(start & !align_1)..((end + align_1) & !align_1)
|
||||
}
|
||||
|
||||
fn chunk_range(
|
||||
whole: Range<u32>,
|
||||
chunk_start: u32,
|
||||
chunk_len: u32,
|
||||
) -> Range<u32> {
|
||||
debug_assert!(chunk_start < whole.end);
|
||||
let start = if chunk_start < whole.start {
|
||||
whole.start - chunk_start
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let end = std::cmp::min(whole.end - chunk_start, chunk_len);
|
||||
start..end
|
||||
}
|
||||
|
||||
fn for_each_extent4d<U>(
|
||||
start: Offset4D<U>,
|
||||
end: Offset4D<U>,
|
||||
chunk: Extent4D<U>,
|
||||
mut f: impl FnMut(Offset4D<U>, Offset4D<U>, Offset4D<U>),
|
||||
) {
|
||||
debug_assert!(chunk.width.is_power_of_two());
|
||||
debug_assert!(chunk.height.is_power_of_two());
|
||||
debug_assert!(chunk.depth.is_power_of_two());
|
||||
debug_assert!(chunk.array_len == 1);
|
||||
|
||||
debug_assert!(start.a == 0);
|
||||
debug_assert!(end.a == 1);
|
||||
|
||||
let x_range = aligned_range(start.x, end.x, chunk.width);
|
||||
let y_range = aligned_range(start.y, end.y, chunk.height);
|
||||
let z_range = aligned_range(start.z, end.z, chunk.depth);
|
||||
|
||||
for z in z_range.step_by(chunk.depth as usize) {
|
||||
let chunk_z = chunk_range(start.z..end.z, z, chunk.depth);
|
||||
for y in y_range.clone().step_by(chunk.height as usize) {
|
||||
let chunk_y = chunk_range(start.y..end.y, y, chunk.height);
|
||||
for x in x_range.clone().step_by(chunk.width as usize) {
|
||||
let chunk_x = chunk_range(start.x..end.x, x, chunk.width);
|
||||
let chunk_start = Offset4D::new(x, y, z, start.a);
|
||||
let start = Offset4D::new(
|
||||
chunk_x.start,
|
||||
chunk_y.start,
|
||||
chunk_z.start,
|
||||
start.a,
|
||||
);
|
||||
let end =
|
||||
Offset4D::new(chunk_x.end, chunk_y.end, chunk_z.end, end.a);
|
||||
f(chunk_start, start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn for_each_extent4d_aligned<U>(
|
||||
start: Offset4D<U>,
|
||||
end: Offset4D<U>,
|
||||
chunk: Extent4D<U>,
|
||||
mut f: impl FnMut(Offset4D<U>),
|
||||
) {
|
||||
debug_assert!(start.x % chunk.width == 0);
|
||||
debug_assert!(start.y % chunk.height == 0);
|
||||
debug_assert!(start.z % chunk.depth == 0);
|
||||
debug_assert!(start.a == 0);
|
||||
|
||||
debug_assert!(end.x % chunk.width == 0);
|
||||
debug_assert!(end.y % chunk.height == 0);
|
||||
debug_assert!(end.z % chunk.depth == 0);
|
||||
debug_assert!(end.a == 1);
|
||||
|
||||
debug_assert!(chunk.width.is_power_of_two());
|
||||
debug_assert!(chunk.height.is_power_of_two());
|
||||
debug_assert!(chunk.depth.is_power_of_two());
|
||||
debug_assert!(chunk.array_len == 1);
|
||||
|
||||
for z in (start.z..end.z).step_by(chunk.depth as usize) {
|
||||
for y in (start.y..end.y).step_by(chunk.height as usize) {
|
||||
for x in (start.x..end.x).step_by(chunk.width as usize) {
|
||||
f(Offset4D::new(x, y, z, start.a));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct BlockPointer {
|
||||
pointer: usize,
|
||||
x_mul: usize,
|
||||
y_mul: usize,
|
||||
z_mul: usize,
|
||||
#[cfg(debug_assertions)]
|
||||
bl_extent: Extent4D<units::Bytes>,
|
||||
}
|
||||
|
||||
impl BlockPointer {
|
||||
fn new(
|
||||
pointer: usize,
|
||||
bl_extent: Extent4D<units::Bytes>,
|
||||
extent: Extent4D<units::Bytes>,
|
||||
) -> BlockPointer {
|
||||
debug_assert!(bl_extent.array_len == 1);
|
||||
|
||||
debug_assert!(extent.width % bl_extent.width == 0);
|
||||
debug_assert!(extent.height % bl_extent.height == 0);
|
||||
debug_assert!(extent.depth % bl_extent.depth == 0);
|
||||
debug_assert!(extent.array_len == 1);
|
||||
|
||||
BlockPointer {
|
||||
pointer,
|
||||
// We assume that offsets passed to at() are aligned to bl_extent so
|
||||
//
|
||||
// x_bl * bl_size_B
|
||||
// = (x / bl_extent.width) * bl_size_B
|
||||
// = x * (bl_size_B / bl_extent.width)
|
||||
// = x * bl_extent.height * bl_extent.depth
|
||||
x_mul: (bl_extent.height as usize) * (bl_extent.depth as usize),
|
||||
|
||||
// y_bl * width_bl * bl_size_B
|
||||
// (y / bl_extent.height) * width_bl * bl_size_B
|
||||
// = y * (bl_size_B / bl_extent.height) * width_bl
|
||||
// = y * bl_extent.width * bl_extent.depth * width_bl
|
||||
// = y * (width_bl * bl_extent.width) * bl_extent.depth
|
||||
// = x * extent.width * bl_extent.depth
|
||||
y_mul: (extent.width as usize) * (bl_extent.depth as usize),
|
||||
|
||||
// z_bl * width_bl * height_bl * bl_size_B
|
||||
// = (z / bl_extent.depth) * width_bl * height_bl * bl_size_B
|
||||
// = z * (bl_size_B / bl_extent.depth) * width_bl * height_bl
|
||||
// = z * (bl_extent.width * bl_extent.height) * width_bl * height_bl
|
||||
// = z * width_bl * bl_extent.width * height_bl * bl_extent.height
|
||||
// = z * extent.width * extent.height
|
||||
z_mul: (extent.width as usize) * (extent.height as usize),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
bl_extent,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn at(&self, offset: Offset4D<units::Bytes>) -> usize {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug_assert!(offset.x % self.bl_extent.width == 0);
|
||||
debug_assert!(offset.y % self.bl_extent.height == 0);
|
||||
debug_assert!(offset.z % self.bl_extent.depth == 0);
|
||||
debug_assert!(offset.a == 0);
|
||||
}
|
||||
|
||||
self.pointer
|
||||
+ (offset.z as usize) * self.z_mul
|
||||
+ (offset.y as usize) * self.y_mul
|
||||
+ (offset.x as usize) * self.x_mul
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
struct LinearPointer {
|
||||
pointer: usize,
|
||||
x_shift: u32,
|
||||
row_stride_B: usize,
|
||||
plane_stride_B: usize,
|
||||
}
|
||||
|
||||
impl LinearPointer {
|
||||
fn new(
|
||||
pointer: usize,
|
||||
x_divisor: u32,
|
||||
row_stride_B: usize,
|
||||
plane_stride_B: usize,
|
||||
) -> LinearPointer {
|
||||
debug_assert!(x_divisor.is_power_of_two());
|
||||
LinearPointer {
|
||||
pointer,
|
||||
x_shift: x_divisor.ilog2(),
|
||||
row_stride_B,
|
||||
plane_stride_B,
|
||||
}
|
||||
}
|
||||
|
||||
fn x_divisor(&self) -> u32 {
|
||||
1 << self.x_shift
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reverse(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
|
||||
debug_assert!(offset.x % (1 << self.x_shift) == 0);
|
||||
debug_assert!(offset.a == 0);
|
||||
LinearPointer {
|
||||
pointer: self
|
||||
.pointer
|
||||
.wrapping_sub((offset.z as usize) * self.plane_stride_B)
|
||||
.wrapping_sub((offset.y as usize) * self.row_stride_B)
|
||||
.wrapping_sub((offset.x >> self.x_shift) as usize),
|
||||
x_shift: self.x_shift,
|
||||
row_stride_B: self.row_stride_B,
|
||||
plane_stride_B: self.plane_stride_B,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn at(self, offset: Offset4D<units::Bytes>) -> usize {
|
||||
debug_assert!(offset.x % (1 << self.x_shift) == 0);
|
||||
debug_assert!(offset.a == 0);
|
||||
self.pointer
|
||||
.wrapping_add((offset.z as usize) * self.plane_stride_B)
|
||||
.wrapping_add((offset.y as usize) * self.row_stride_B)
|
||||
.wrapping_add((offset.x >> self.x_shift) as usize)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn offset(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
|
||||
LinearPointer {
|
||||
pointer: self.at(offset),
|
||||
x_shift: self.x_shift,
|
||||
row_stride_B: self.row_stride_B,
|
||||
plane_stride_B: self.plane_stride_B,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn copy_tile<CG: CopyGOB>(
|
||||
tiling: Tiling,
|
||||
tile_ptr: usize,
|
||||
linear: LinearPointer,
|
||||
start: Offset4D<units::Bytes>,
|
||||
end: Offset4D<units::Bytes>,
|
||||
) {
|
||||
debug_assert!(linear.x_divisor() == CG::X_DIVISOR);
|
||||
debug_assert!(tiling.gob_type.extent_B() == CG::GOB_EXTENT_B);
|
||||
|
||||
let tile_extent_B = tiling.extent_B();
|
||||
let tile_ptr = BlockPointer::new(tile_ptr, CG::GOB_EXTENT_B, tile_extent_B);
|
||||
|
||||
if start.is_aligned_to(CG::GOB_EXTENT_B)
|
||||
&& end.is_aligned_to(CG::GOB_EXTENT_B)
|
||||
{
|
||||
for_each_extent4d_aligned(start, end, CG::GOB_EXTENT_B, |gob| {
|
||||
CG::copy_whole_gob(tile_ptr.at(gob), linear.offset(gob));
|
||||
});
|
||||
} else {
|
||||
for_each_extent4d(start, end, CG::GOB_EXTENT_B, |gob, start, end| {
|
||||
let tiled = tile_ptr.at(gob);
|
||||
let linear = linear.offset(gob);
|
||||
if start == Offset4D::new(0, 0, 0, 0)
|
||||
&& end == Offset4D::new(0, 0, 0, 0) + CG::GOB_EXTENT_B
|
||||
{
|
||||
CG::copy_whole_gob(tiled, linear);
|
||||
} else {
|
||||
CG::copy_gob(tiled, linear, start, end);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn copy_tiled<CG: CopyGOB>(
|
||||
tiling: Tiling,
|
||||
level_extent_B: Extent4D<units::Bytes>,
|
||||
level_tiled_ptr: usize,
|
||||
linear: LinearPointer,
|
||||
start: Offset4D<units::Bytes>,
|
||||
end: Offset4D<units::Bytes>,
|
||||
) {
|
||||
let tile_extent_B = tiling.extent_B();
|
||||
let level_extent_B = level_extent_B.align(&tile_extent_B);
|
||||
|
||||
// Back up the linear pointer so it also points at the start of the level.
|
||||
// This way, every step of the iteration can assume that both pointers
|
||||
// point to the start chunk of the level, tile, or GOB.
|
||||
let linear = linear.reverse(start);
|
||||
|
||||
let level_tiled_ptr =
|
||||
BlockPointer::new(level_tiled_ptr, tile_extent_B, level_extent_B);
|
||||
|
||||
for_each_extent4d(start, end, tile_extent_B, |tile, start, end| {
|
||||
let tile_ptr = level_tiled_ptr.at(tile);
|
||||
let linear = linear.offset(tile);
|
||||
copy_tile::<CG>(tiling, tile_ptr, linear, start, end);
|
||||
});
|
||||
}
|
||||
|
||||
struct RawCopyToTiled {}
|
||||
|
||||
impl Copy16B for RawCopyToTiled {
|
||||
const X_DIVISOR: u32 = 1;
|
||||
|
||||
unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
|
||||
// This is backwards from memcpy
|
||||
std::ptr::copy_nonoverlapping(linear, tiled, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
struct RawCopyToLinear {}
|
||||
|
||||
impl Copy16B for RawCopyToLinear {
|
||||
const X_DIVISOR: u32 = 1;
|
||||
|
||||
unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
|
||||
// This is backwards from memcpy
|
||||
std::ptr::copy_nonoverlapping(tiled, linear, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn nil_copy_linear_to_tiled(
|
||||
tiled_dst: *mut c_void,
|
||||
level_extent_B: Extent4D<units::Bytes>,
|
||||
linear_src: *const c_void,
|
||||
linear_row_stride_B: usize,
|
||||
linear_plane_stride_B: usize,
|
||||
offset_B: Offset4D<units::Bytes>,
|
||||
extent_B: Extent4D<units::Bytes>,
|
||||
tiling: &Tiling,
|
||||
) {
|
||||
let end_B = offset_B + extent_B;
|
||||
|
||||
let linear_src = linear_src as usize;
|
||||
let tiled_dst = tiled_dst as usize;
|
||||
let linear_pointer = LinearPointer::new(
|
||||
linear_src,
|
||||
1,
|
||||
linear_row_stride_B,
|
||||
linear_plane_stride_B,
|
||||
);
|
||||
|
||||
copy_tiled::<CopyGOBTuring2D<RawCopyToTiled>>(
|
||||
*tiling,
|
||||
level_extent_B,
|
||||
tiled_dst,
|
||||
linear_pointer,
|
||||
offset_B,
|
||||
end_B,
|
||||
);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn nil_copy_tiled_to_linear(
|
||||
linear_dst: *mut c_void,
|
||||
linear_row_stride_B: usize,
|
||||
linear_plane_stride_B: usize,
|
||||
tiled_src: *const c_void,
|
||||
level_extent_B: Extent4D<units::Bytes>,
|
||||
offset_B: Offset4D<units::Bytes>,
|
||||
extent_B: Extent4D<units::Bytes>,
|
||||
tiling: &Tiling,
|
||||
) {
|
||||
let mut end_B = offset_B + extent_B;
|
||||
end_B.a = 1;
|
||||
let linear_dst = linear_dst as usize;
|
||||
let tiled_src = tiled_src as usize;
|
||||
let linear_pointer = LinearPointer::new(
|
||||
linear_dst,
|
||||
1,
|
||||
linear_row_stride_B,
|
||||
linear_plane_stride_B,
|
||||
);
|
||||
|
||||
copy_tiled::<CopyGOBTuring2D<RawCopyToLinear>>(
|
||||
*tiling,
|
||||
level_extent_B,
|
||||
tiled_src,
|
||||
linear_pointer,
|
||||
offset_B,
|
||||
end_B,
|
||||
);
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
extern crate nil_rs_bindings;
|
||||
extern crate nvidia_headers;
|
||||
|
||||
mod copy;
|
||||
mod extent;
|
||||
mod format;
|
||||
mod image;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue