mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
nvk: Use VM_BIND for contiguous heaps instead of copying
This gets rid of our (fairly sketchy) heap resizing via stall-and-copy and replaces it with VM_BIND. We couldn't do this on the old nouveau API but now that we can assume VM_BIND, it makes everything simpler. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27205>
This commit is contained in:
parent
f0fad6ed17
commit
e162c2e78e
7 changed files with 96 additions and 183 deletions
|
|
@ -55,6 +55,15 @@ nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p)
|
||||||
if (pdev->info.cls_compute == MAXWELL_COMPUTE_A)
|
if (pdev->info.cls_compute == MAXWELL_COMPUTE_A)
|
||||||
P_IMMD(p, NVB0C0, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE);
|
P_IMMD(p, NVB0C0, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE);
|
||||||
|
|
||||||
|
if (pdev->info.cls_eng3d < VOLTA_COMPUTE_A) {
|
||||||
|
uint64_t shader_base_addr =
|
||||||
|
nvk_heap_contiguous_base_address(&dev->shader_heap);
|
||||||
|
|
||||||
|
P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A);
|
||||||
|
P_NVA0C0_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32);
|
||||||
|
P_NVA0C0_SET_PROGRAM_REGION_B(p, shader_base_addr);
|
||||||
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -367,6 +367,15 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
|
||||||
|
|
||||||
P_IMMD(p, NV9097, SET_CT_MRT_ENABLE, V_TRUE);
|
P_IMMD(p, NV9097, SET_CT_MRT_ENABLE, V_TRUE);
|
||||||
|
|
||||||
|
if (pdev->info.cls_eng3d < VOLTA_A) {
|
||||||
|
uint64_t shader_base_addr =
|
||||||
|
nvk_heap_contiguous_base_address(&dev->shader_heap);
|
||||||
|
|
||||||
|
P_MTHD(p, NV9097, SET_PROGRAM_REGION_A);
|
||||||
|
P_NV9097_SET_PROGRAM_REGION_A(p, shader_base_addr >> 32);
|
||||||
|
P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr);
|
||||||
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < 6; i++) {
|
for (uint32_t i = 0; i < 6; i++) {
|
||||||
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), {
|
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), {
|
||||||
.enable = ENABLE_FALSE,
|
.enable = ENABLE_FALSE,
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,17 @@ nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
heap->bo_flags |= NOUVEAU_WS_BO_MAP;
|
heap->bo_flags |= NOUVEAU_WS_BO_MAP;
|
||||||
heap->map_flags = map_flags;
|
heap->map_flags = map_flags;
|
||||||
heap->overalloc = overalloc;
|
heap->overalloc = overalloc;
|
||||||
heap->contiguous = contiguous;
|
|
||||||
|
if (contiguous) {
|
||||||
|
heap->base_addr = nouveau_ws_alloc_vma(dev->ws_dev, 0,
|
||||||
|
NVK_HEAP_MAX_SIZE,
|
||||||
|
0, false /* bda */,
|
||||||
|
false /* sparse */);
|
||||||
|
if (heap->base_addr == 0) {
|
||||||
|
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||||
|
"Failed to allocate VMA for heap");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
simple_mtx_init(&heap->mutex, mtx_plain);
|
simple_mtx_init(&heap->mutex, mtx_plain);
|
||||||
util_vma_heap_init(&heap->heap, 0, 0);
|
util_vma_heap_init(&heap->heap, 0, 0);
|
||||||
|
|
@ -41,12 +51,21 @@ void
|
||||||
nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
|
nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
|
||||||
{
|
{
|
||||||
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
|
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
|
||||||
|
if (heap->base_addr != 0) {
|
||||||
|
nouveau_ws_bo_unbind_vma(dev->ws_dev, heap->bos[bo_idx].addr,
|
||||||
|
heap->bos[bo_idx].bo->size);
|
||||||
|
}
|
||||||
nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map);
|
nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map);
|
||||||
nouveau_ws_bo_destroy(heap->bos[bo_idx].bo);
|
nouveau_ws_bo_destroy(heap->bos[bo_idx].bo);
|
||||||
}
|
}
|
||||||
|
|
||||||
util_vma_heap_finish(&heap->heap);
|
util_vma_heap_finish(&heap->heap);
|
||||||
simple_mtx_destroy(&heap->mutex);
|
simple_mtx_destroy(&heap->mutex);
|
||||||
|
|
||||||
|
if (heap->base_addr != 0) {
|
||||||
|
nouveau_ws_free_vma(dev->ws_dev, heap->base_addr, NVK_HEAP_MAX_SIZE,
|
||||||
|
false /* bda */, false /* sparse */);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
|
|
@ -74,115 +93,41 @@ vma_bo_offset(uint64_t offset)
|
||||||
static VkResult
|
static VkResult
|
||||||
nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
|
nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
|
||||||
{
|
{
|
||||||
VkResult result;
|
if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
|
||||||
|
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||||
if (heap->contiguous) {
|
"Heap has already hit its maximum size");
|
||||||
if (heap->total_size >= NVK_HEAP_MAX_SIZE) {
|
|
||||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
|
||||||
"Heap has already hit its maximum size");
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint64_t new_bo_size =
|
|
||||||
MAX2(heap->total_size * 2, NVK_HEAP_MIN_SIZE);
|
|
||||||
|
|
||||||
void *new_bo_map;
|
|
||||||
struct nouveau_ws_bo *new_bo =
|
|
||||||
nouveau_ws_bo_new_mapped(dev->ws_dev,
|
|
||||||
new_bo_size + heap->overalloc, 0,
|
|
||||||
heap->bo_flags, heap->map_flags,
|
|
||||||
&new_bo_map);
|
|
||||||
if (new_bo == NULL) {
|
|
||||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
|
||||||
"Failed to allocate a heap BO: %m");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (heap->bo_count > 0) {
|
|
||||||
assert(heap->bo_count == 1);
|
|
||||||
struct nouveau_ws_bo *old_bo = heap->bos[0].bo;
|
|
||||||
|
|
||||||
assert(util_is_power_of_two_nonzero64(heap->total_size));
|
|
||||||
assert(heap->total_size >= NVK_HEAP_MIN_SIZE);
|
|
||||||
assert(heap->total_size <= old_bo->size);
|
|
||||||
assert(heap->total_size < new_bo_size);
|
|
||||||
|
|
||||||
unsigned line_bytes = MIN2(heap->total_size, 1 << 17);
|
|
||||||
assert(heap->total_size % line_bytes == 0);
|
|
||||||
unsigned line_count = heap->total_size / line_bytes;
|
|
||||||
|
|
||||||
uint32_t push_dw[12];
|
|
||||||
struct nv_push push;
|
|
||||||
nv_push_init(&push, push_dw, ARRAY_SIZE(push_dw));
|
|
||||||
struct nv_push *p = &push;
|
|
||||||
|
|
||||||
P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
|
|
||||||
P_NV90B5_OFFSET_IN_UPPER(p, old_bo->offset >> 32);
|
|
||||||
P_NV90B5_OFFSET_IN_LOWER(p, old_bo->offset & 0xffffffff);
|
|
||||||
P_NV90B5_OFFSET_OUT_UPPER(p, new_bo->offset >> 32);
|
|
||||||
P_NV90B5_OFFSET_OUT_LOWER(p, new_bo->offset & 0xffffffff);
|
|
||||||
P_NV90B5_PITCH_IN(p, line_bytes);
|
|
||||||
P_NV90B5_PITCH_OUT(p, line_bytes);
|
|
||||||
P_NV90B5_LINE_LENGTH_IN(p, line_bytes);
|
|
||||||
P_NV90B5_LINE_COUNT(p, line_count);
|
|
||||||
|
|
||||||
P_IMMD(p, NV90B5, LAUNCH_DMA, {
|
|
||||||
.data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
|
|
||||||
.multi_line_enable = MULTI_LINE_ENABLE_TRUE,
|
|
||||||
.flush_enable = FLUSH_ENABLE_TRUE,
|
|
||||||
.src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
|
|
||||||
.dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
|
|
||||||
});
|
|
||||||
|
|
||||||
struct nouveau_ws_bo *push_bos[] = { new_bo, old_bo, };
|
|
||||||
result = nvk_queue_submit_simple(&dev->queue,
|
|
||||||
nv_push_dw_count(&push), push_dw,
|
|
||||||
ARRAY_SIZE(push_bos), push_bos);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
nouveau_ws_bo_unmap(new_bo, new_bo_map);
|
|
||||||
nouveau_ws_bo_destroy(new_bo);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
nouveau_ws_bo_unmap(heap->bos[0].bo, heap->bos[0].map);
|
|
||||||
nouveau_ws_bo_destroy(heap->bos[0].bo);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t vma = encode_vma(0, heap->total_size);
|
|
||||||
util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->total_size);
|
|
||||||
|
|
||||||
heap->total_size = new_bo_size;
|
|
||||||
heap->bo_count = 1;
|
|
||||||
heap->bos[0].bo = new_bo;
|
|
||||||
heap->bos[0].map = new_bo_map;
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
} else {
|
|
||||||
if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
|
|
||||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
|
||||||
"Heap has already hit its maximum size");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* First two BOs are MIN_SIZE, double after that */
|
|
||||||
const uint64_t new_bo_size =
|
|
||||||
NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
|
|
||||||
|
|
||||||
heap->bos[heap->bo_count].bo =
|
|
||||||
nouveau_ws_bo_new_mapped(dev->ws_dev,
|
|
||||||
new_bo_size + heap->overalloc, 0,
|
|
||||||
heap->bo_flags, heap->map_flags,
|
|
||||||
&heap->bos[heap->bo_count].map);
|
|
||||||
if (heap->bos[heap->bo_count].bo == NULL) {
|
|
||||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
|
||||||
"Failed to allocate a heap BO: %m");
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t vma = encode_vma(heap->bo_count, 0);
|
|
||||||
util_vma_heap_free(&heap->heap, vma, new_bo_size);
|
|
||||||
|
|
||||||
heap->total_size += new_bo_size;
|
|
||||||
heap->bo_count++;
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* First two BOs are MIN_SIZE, double after that */
|
||||||
|
const uint64_t new_bo_size =
|
||||||
|
NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
|
||||||
|
|
||||||
|
heap->bos[heap->bo_count].bo =
|
||||||
|
nouveau_ws_bo_new_mapped(dev->ws_dev,
|
||||||
|
new_bo_size + heap->overalloc, 0,
|
||||||
|
heap->bo_flags, heap->map_flags,
|
||||||
|
&heap->bos[heap->bo_count].map);
|
||||||
|
if (heap->bos[heap->bo_count].bo == NULL) {
|
||||||
|
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||||
|
"Failed to allocate a heap BO: %m");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (heap->base_addr != 0) {
|
||||||
|
heap->bos[heap->bo_count].addr = heap->base_addr + heap->total_size;
|
||||||
|
nouveau_ws_bo_bind_vma(dev->ws_dev, heap->bos[heap->bo_count].bo,
|
||||||
|
heap->bos[heap->bo_count].addr,
|
||||||
|
new_bo_size, 0, 0);
|
||||||
|
} else {
|
||||||
|
heap->bos[heap->bo_count].addr = heap->bos[heap->bo_count].bo->offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t vma = encode_vma(heap->bo_count, 0);
|
||||||
|
util_vma_heap_free(&heap->heap, vma, new_bo_size);
|
||||||
|
|
||||||
|
heap->total_size += new_bo_size;
|
||||||
|
heap->bo_count++;
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
|
@ -201,7 +146,7 @@ nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
assert(bo_offset + size + heap->overalloc <=
|
assert(bo_offset + size + heap->overalloc <=
|
||||||
heap->bos[bo_idx].bo->size);
|
heap->bos[bo_idx].bo->size);
|
||||||
|
|
||||||
if (heap->contiguous) {
|
if (heap->base_addr != 0) {
|
||||||
assert(bo_idx == 0);
|
assert(bo_idx == 0);
|
||||||
*addr_out = bo_offset;
|
*addr_out = bo_offset;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -224,31 +169,21 @@ nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
{
|
{
|
||||||
assert(addr + size > addr);
|
assert(addr + size > addr);
|
||||||
|
|
||||||
if (heap->contiguous) {
|
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
|
||||||
assert(heap->bo_count == 1);
|
if (addr < heap->bos[bo_idx].bo->offset)
|
||||||
uint64_t bo_offset = addr;
|
continue;
|
||||||
|
|
||||||
assert(bo_offset + size <= heap->bos[0].bo->size);
|
uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset;
|
||||||
uint64_t vma = encode_vma(0, bo_offset);
|
if (bo_offset >= heap->bos[bo_idx].bo->size)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
|
||||||
|
uint64_t vma = encode_vma(bo_idx, bo_offset);
|
||||||
|
|
||||||
util_vma_heap_free(&heap->heap, vma, size);
|
util_vma_heap_free(&heap->heap, vma, size);
|
||||||
} else {
|
return;
|
||||||
for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
|
|
||||||
if (addr < heap->bos[bo_idx].bo->offset)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
uint64_t bo_offset = addr - heap->bos[bo_idx].bo->offset;
|
|
||||||
if (bo_offset >= heap->bos[bo_idx].bo->size)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
|
|
||||||
uint64_t vma = encode_vma(bo_idx, bo_offset);
|
|
||||||
|
|
||||||
util_vma_heap_free(&heap->heap, vma, size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
assert(!"Failed to find heap BO");
|
|
||||||
}
|
}
|
||||||
|
assert(!"Failed to find heap BO");
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
|
|
@ -256,12 +191,6 @@ nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
uint64_t size, uint32_t alignment,
|
uint64_t size, uint32_t alignment,
|
||||||
uint64_t *addr_out, void **map_out)
|
uint64_t *addr_out, void **map_out)
|
||||||
{
|
{
|
||||||
/* We can't return maps from contiguous heaps because the the map may go
|
|
||||||
* away at any time when the lock isn't taken and we don't want to trust
|
|
||||||
* the caller with racy maps.
|
|
||||||
*/
|
|
||||||
assert(!heap->contiguous);
|
|
||||||
|
|
||||||
simple_mtx_lock(&heap->mutex);
|
simple_mtx_lock(&heap->mutex);
|
||||||
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
|
VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
|
||||||
addr_out, map_out);
|
addr_out, map_out);
|
||||||
|
|
|
||||||
|
|
@ -23,17 +23,20 @@ struct nvk_device;
|
||||||
struct nvk_heap_bo {
|
struct nvk_heap_bo {
|
||||||
struct nouveau_ws_bo *bo;
|
struct nouveau_ws_bo *bo;
|
||||||
void *map;
|
void *map;
|
||||||
|
uint64_t addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvk_heap {
|
struct nvk_heap {
|
||||||
enum nouveau_ws_bo_flags bo_flags;
|
enum nouveau_ws_bo_flags bo_flags;
|
||||||
enum nouveau_ws_bo_map_flags map_flags;
|
enum nouveau_ws_bo_map_flags map_flags;
|
||||||
uint32_t overalloc;
|
uint32_t overalloc;
|
||||||
bool contiguous;
|
|
||||||
|
|
||||||
simple_mtx_t mutex;
|
simple_mtx_t mutex;
|
||||||
struct util_vma_heap heap;
|
struct util_vma_heap heap;
|
||||||
|
|
||||||
|
/* Base address for contiguous heaps, 0 otherwise */
|
||||||
|
uint64_t base_addr;
|
||||||
|
|
||||||
uint64_t total_size;
|
uint64_t total_size;
|
||||||
|
|
||||||
uint32_t bo_count;
|
uint32_t bo_count;
|
||||||
|
|
@ -58,19 +61,11 @@ VkResult nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
|
void nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
|
||||||
uint64_t addr, uint64_t size);
|
uint64_t addr, uint64_t size);
|
||||||
|
|
||||||
static inline struct nouveau_ws_bo *
|
static inline uint64_t
|
||||||
nvk_heap_get_contiguous_bo_ref(struct nvk_heap *heap)
|
nvk_heap_contiguous_base_address(struct nvk_heap *heap)
|
||||||
{
|
{
|
||||||
assert(heap->contiguous);
|
assert(heap->base_addr != 0);
|
||||||
assert(heap->bo_count <= 1);
|
return heap->base_addr;
|
||||||
|
|
||||||
simple_mtx_lock(&heap->mutex);
|
|
||||||
struct nouveau_ws_bo *bo = heap->bos[0].bo;
|
|
||||||
if (bo)
|
|
||||||
nouveau_ws_bo_ref(bo);
|
|
||||||
simple_mtx_unlock(&heap->mutex);
|
|
||||||
|
|
||||||
return bo;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* define NVK_HEAP_H */
|
#endif /* define NVK_HEAP_H */
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,6 @@ nvk_queue_state_finish(struct nvk_device *dev,
|
||||||
nouveau_ws_bo_destroy(qs->images.bo);
|
nouveau_ws_bo_destroy(qs->images.bo);
|
||||||
if (qs->samplers.bo)
|
if (qs->samplers.bo)
|
||||||
nouveau_ws_bo_destroy(qs->samplers.bo);
|
nouveau_ws_bo_destroy(qs->samplers.bo);
|
||||||
if (qs->shaders.bo)
|
|
||||||
nouveau_ws_bo_destroy(qs->shaders.bo);
|
|
||||||
if (qs->slm.bo)
|
if (qs->slm.bo)
|
||||||
nouveau_ws_bo_destroy(qs->slm.bo);
|
nouveau_ws_bo_destroy(qs->slm.bo);
|
||||||
if (qs->push.bo) {
|
if (qs->push.bo) {
|
||||||
|
|
@ -90,19 +88,6 @@ nvk_queue_state_update(struct nvk_device *dev,
|
||||||
nouveau_ws_bo_destroy(bo);
|
nouveau_ws_bo_destroy(bo);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dev->shader_heap.contiguous) {
|
|
||||||
bo = nvk_heap_get_contiguous_bo_ref(&dev->shader_heap);
|
|
||||||
if (qs->shaders.bo != bo) {
|
|
||||||
if (qs->shaders.bo)
|
|
||||||
nouveau_ws_bo_destroy(qs->shaders.bo);
|
|
||||||
qs->shaders.bo = bo;
|
|
||||||
dirty = true;
|
|
||||||
} else {
|
|
||||||
if (bo)
|
|
||||||
nouveau_ws_bo_destroy(bo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
|
bo = nvk_slm_area_get_bo_ref(&dev->slm, &bytes_per_warp, &bytes_per_tpc);
|
||||||
if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
|
if (qs->slm.bo != bo || qs->slm.bytes_per_warp != bytes_per_warp ||
|
||||||
qs->slm.bytes_per_tpc != bytes_per_tpc) {
|
qs->slm.bytes_per_tpc != bytes_per_tpc) {
|
||||||
|
|
@ -182,20 +167,6 @@ nvk_queue_state_update(struct nvk_device *dev,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qs->shaders.bo) {
|
|
||||||
/* Compute */
|
|
||||||
assert(dev->pdev->info.cls_compute < VOLTA_COMPUTE_A);
|
|
||||||
P_MTHD(p, NVA0C0, SET_PROGRAM_REGION_A);
|
|
||||||
P_NVA0C0_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32);
|
|
||||||
P_NVA0C0_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset);
|
|
||||||
|
|
||||||
/* 3D */
|
|
||||||
assert(dev->pdev->info.cls_eng3d < VOLTA_A);
|
|
||||||
P_MTHD(p, NV9097, SET_PROGRAM_REGION_A);
|
|
||||||
P_NV9097_SET_PROGRAM_REGION_A(p, qs->shaders.bo->offset >> 32);
|
|
||||||
P_NV9097_SET_PROGRAM_REGION_B(p, qs->shaders.bo->offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (qs->slm.bo) {
|
if (qs->slm.bo) {
|
||||||
const uint64_t slm_addr = qs->slm.bo->offset;
|
const uint64_t slm_addr = qs->slm.bo->offset;
|
||||||
const uint64_t slm_size = qs->slm.bo->size;
|
const uint64_t slm_size = qs->slm.bo->size;
|
||||||
|
|
|
||||||
|
|
@ -26,10 +26,6 @@ struct nvk_queue_state {
|
||||||
uint32_t alloc_count;
|
uint32_t alloc_count;
|
||||||
} samplers;
|
} samplers;
|
||||||
|
|
||||||
struct {
|
|
||||||
struct nouveau_ws_bo *bo;
|
|
||||||
} shaders;
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct nouveau_ws_bo *bo;
|
struct nouveau_ws_bo *bo;
|
||||||
uint32_t bytes_per_warp;
|
uint32_t bytes_per_warp;
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,10 @@ nouveau_ws_alloc_vma(struct nouveau_ws_device *dev,
|
||||||
{
|
{
|
||||||
assert(dev->has_vm_bind);
|
assert(dev->has_vm_bind);
|
||||||
|
|
||||||
|
/* if the caller doesn't care, use the GPU page size */
|
||||||
|
if (align == 0)
|
||||||
|
align = 0x1000;
|
||||||
|
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
simple_mtx_lock(&dev->vma_mutex);
|
simple_mtx_lock(&dev->vma_mutex);
|
||||||
if (bda_capture_replay) {
|
if (bda_capture_replay) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue