mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
Merge ../mesa into vulkan
This commit is contained in:
commit
21d5e52da8
90 changed files with 1475 additions and 1112 deletions
|
|
@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, r600, radeonsi
|
|||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_shader_precision DONE (all drivers that support GLSL 4.10)
|
||||
GL_ARB_vertex_attrib_64bit DONE (llvmpipe, softpipe)
|
||||
GL_ARB_viewport_array DONE (i965, nv50, llvmpipe)
|
||||
GL_ARB_viewport_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
|
||||
|
||||
GL 4.2, GLSL 4.20:
|
||||
|
|
|
|||
|
|
@ -93,6 +93,8 @@ C_SOURCES := \
|
|||
pipebuffer/pb_bufmgr_ondemand.c \
|
||||
pipebuffer/pb_bufmgr_pool.c \
|
||||
pipebuffer/pb_bufmgr_slab.c \
|
||||
pipebuffer/pb_cache.c \
|
||||
pipebuffer/pb_cache.h \
|
||||
pipebuffer/pb_validate.c \
|
||||
pipebuffer/pb_validate.h \
|
||||
postprocess/filters.h \
|
||||
|
|
|
|||
|
|
@ -192,11 +192,11 @@ static void interp(const struct clip_stage *clip,
|
|||
t_nopersp = t;
|
||||
/* find either in.x != out.x or in.y != out.y */
|
||||
for (k = 0; k < 2; k++) {
|
||||
if (in->clip[k] != out->clip[k]) {
|
||||
if (in->pre_clip_pos[k] != out->pre_clip_pos[k]) {
|
||||
/* do divide by W, then compute linear interpolation factor */
|
||||
float in_coord = in->clip[k] / in->clip[3];
|
||||
float out_coord = out->clip[k] / out->clip[3];
|
||||
float dst_coord = dst->clip[k] / dst->clip[3];
|
||||
float in_coord = in->pre_clip_pos[k] / in->pre_clip_pos[3];
|
||||
float out_coord = out->pre_clip_pos[k] / out->pre_clip_pos[3];
|
||||
float dst_coord = dst->pre_clip_pos[k] / dst->pre_clip_pos[3];
|
||||
t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,12 +71,10 @@ draw_pt_fetch_prepare(struct pt_fetch *fetch,
|
|||
|
||||
fetch->vertex_size = vertex_size;
|
||||
|
||||
/* Leave the clipmask/edgeflags/pad/vertex_id untouched
|
||||
/* Leave the clipmask/edgeflags/pad/vertex_id,
|
||||
* clip[] and whatever else in the header untouched.
|
||||
*/
|
||||
dst_offset += 1 * sizeof(float);
|
||||
/* Just leave the clip[] and pre_clip_pos[] array untouched.
|
||||
*/
|
||||
dst_offset += 8 * sizeof(float);
|
||||
dst_offset = offsetof(struct vertex_header, data);
|
||||
|
||||
if (instance_id_index != ~0) {
|
||||
num_extra_inputs++;
|
||||
|
|
|
|||
|
|
@ -43,15 +43,7 @@
|
|||
|
||||
#include "pb_buffer.h"
|
||||
#include "pb_bufmgr.h"
|
||||
|
||||
|
||||
/**
|
||||
* Convenience macro (type safe).
|
||||
*/
|
||||
#define SUPER(__derived) (&(__derived)->base)
|
||||
|
||||
|
||||
struct pb_cache_manager;
|
||||
#include "pb_cache.h"
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -60,31 +52,17 @@ struct pb_cache_manager;
|
|||
struct pb_cache_buffer
|
||||
{
|
||||
struct pb_buffer base;
|
||||
|
||||
struct pb_buffer *buffer;
|
||||
struct pb_cache_manager *mgr;
|
||||
|
||||
/** Caching time interval */
|
||||
int64_t start, end;
|
||||
|
||||
struct list_head head;
|
||||
struct pb_cache_entry cache_entry;
|
||||
};
|
||||
|
||||
|
||||
struct pb_cache_manager
|
||||
{
|
||||
struct pb_manager base;
|
||||
|
||||
struct pb_manager *provider;
|
||||
unsigned usecs;
|
||||
|
||||
pipe_mutex mutex;
|
||||
|
||||
struct list_head delayed;
|
||||
pb_size numDelayed;
|
||||
float size_factor;
|
||||
unsigned bypass_usage;
|
||||
uint64_t cache_size, max_cache_size;
|
||||
struct pb_cache cache;
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -104,76 +82,29 @@ pb_cache_manager(struct pb_manager *mgr)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
|
||||
{
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
|
||||
if (buf->head.next) {
|
||||
LIST_DEL(&buf->head);
|
||||
assert(mgr->numDelayed);
|
||||
--mgr->numDelayed;
|
||||
mgr->cache_size -= buf->base.size;
|
||||
}
|
||||
buf->mgr = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
|
||||
{
|
||||
struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
struct pb_cache_buffer *buf = pb_cache_buffer(pb_buf);
|
||||
|
||||
if (!mgr)
|
||||
return;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
_pb_cache_manager_remove_buffer_locked(buf);
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
/* the buffer won't be added if mgr is NULL */
|
||||
buf->mgr = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually destroy the buffer.
|
||||
*/
|
||||
static inline void
|
||||
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
|
||||
static void
|
||||
_pb_cache_buffer_destroy(struct pb_buffer *pb_buf)
|
||||
{
|
||||
if (buf->mgr)
|
||||
_pb_cache_manager_remove_buffer_locked(buf);
|
||||
struct pb_cache_buffer *buf = pb_cache_buffer(pb_buf);
|
||||
|
||||
assert(!pipe_is_referenced(&buf->base.reference));
|
||||
pb_reference(&buf->buffer, NULL);
|
||||
FREE(buf);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Free as many cache buffers from the list head as possible.
|
||||
*/
|
||||
static void
|
||||
_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr)
|
||||
{
|
||||
struct list_head *curr, *next;
|
||||
struct pb_cache_buffer *buf;
|
||||
int64_t now;
|
||||
|
||||
now = os_time_get();
|
||||
|
||||
curr = mgr->delayed.next;
|
||||
next = curr->next;
|
||||
while(curr != &mgr->delayed) {
|
||||
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
|
||||
|
||||
if(!os_time_timeout(buf->start, buf->end, now))
|
||||
break;
|
||||
|
||||
_pb_cache_buffer_destroy(buf);
|
||||
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
pb_cache_buffer_destroy(struct pb_buffer *_buf)
|
||||
{
|
||||
|
|
@ -186,25 +117,7 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
|
|||
return;
|
||||
}
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
assert(!pipe_is_referenced(&buf->base.reference));
|
||||
|
||||
_pb_cache_buffer_list_check_free(mgr);
|
||||
|
||||
/* Directly release any buffer that exceeds the limit. */
|
||||
if (mgr->cache_size + buf->base.size > mgr->max_cache_size) {
|
||||
pb_reference(&buf->buffer, NULL);
|
||||
FREE(buf);
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
buf->start = os_time_get();
|
||||
buf->end = buf->start + mgr->usecs;
|
||||
LIST_ADDTAIL(&buf->head, &mgr->delayed);
|
||||
++mgr->numDelayed;
|
||||
mgr->cache_size += buf->base.size;
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
pb_cache_add_buffer(&buf->cache_entry);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -265,40 +178,24 @@ pb_cache_buffer_vtbl = {
|
|||
};
|
||||
|
||||
|
||||
static inline int
|
||||
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
|
||||
pb_size size,
|
||||
const struct pb_desc *desc)
|
||||
static bool
|
||||
pb_cache_can_reclaim_buffer(struct pb_buffer *_buf)
|
||||
{
|
||||
if (desc->usage & buf->mgr->bypass_usage)
|
||||
return 0;
|
||||
|
||||
if(buf->base.size < size)
|
||||
return 0;
|
||||
|
||||
/* be lenient with size */
|
||||
if(buf->base.size > (unsigned) (buf->mgr->size_factor * size))
|
||||
return 0;
|
||||
|
||||
if(!pb_check_alignment(desc->alignment, buf->base.alignment))
|
||||
return 0;
|
||||
|
||||
if(!pb_check_usage(desc->usage, buf->base.usage))
|
||||
return 0;
|
||||
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
|
||||
|
||||
if (buf->mgr->provider->is_buffer_busy) {
|
||||
if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer))
|
||||
return -1;
|
||||
return false;
|
||||
} else {
|
||||
void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
|
||||
|
||||
if (!ptr)
|
||||
return -1;
|
||||
return false;
|
||||
|
||||
pb_unmap(buf->buffer);
|
||||
}
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -309,63 +206,15 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
|
|||
{
|
||||
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
|
||||
struct pb_cache_buffer *buf;
|
||||
struct pb_cache_buffer *curr_buf;
|
||||
struct list_head *curr, *next;
|
||||
int64_t now;
|
||||
int ret = 0;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
|
||||
buf = NULL;
|
||||
curr = mgr->delayed.next;
|
||||
next = curr->next;
|
||||
|
||||
/* search in the expired buffers, freeing them in the process */
|
||||
now = os_time_get();
|
||||
while(curr != &mgr->delayed) {
|
||||
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
|
||||
if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
|
||||
buf = curr_buf;
|
||||
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
|
||||
_pb_cache_buffer_destroy(curr_buf);
|
||||
else
|
||||
/* This buffer (and all hereafter) are still hot in cache */
|
||||
break;
|
||||
if (ret == -1)
|
||||
break;
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
|
||||
/* keep searching in the hot buffers */
|
||||
if(!buf && ret != -1) {
|
||||
while(curr != &mgr->delayed) {
|
||||
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
|
||||
ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
|
||||
if (ret > 0) {
|
||||
buf = curr_buf;
|
||||
break;
|
||||
}
|
||||
if (ret == -1)
|
||||
break;
|
||||
/* no need to check the timeout here */
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (buf) {
|
||||
mgr->cache_size -= buf->base.size;
|
||||
LIST_DEL(&buf->head);
|
||||
--mgr->numDelayed;
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
/* Increase refcount */
|
||||
pipe_reference_init(&buf->base.reference, 1);
|
||||
/* get a buffer from the cache */
|
||||
buf = (struct pb_cache_buffer *)
|
||||
pb_cache_reclaim_buffer(&mgr->cache, size, desc->alignment,
|
||||
desc->usage);
|
||||
if (buf)
|
||||
return &buf->base;
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
|
||||
/* create a new one */
|
||||
buf = CALLOC_STRUCT(pb_cache_buffer);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
|
|
@ -374,7 +223,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
|
|||
|
||||
/* Empty the cache and try again. */
|
||||
if (!buf->buffer) {
|
||||
mgr->base.flush(&mgr->base);
|
||||
pb_cache_release_all_buffers(&mgr->cache);
|
||||
buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
|
||||
}
|
||||
|
||||
|
|
@ -385,7 +234,6 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
|
|||
|
||||
assert(pipe_is_referenced(&buf->buffer->reference));
|
||||
assert(pb_check_alignment(desc->alignment, buf->buffer->alignment));
|
||||
assert(pb_check_usage(desc->usage & ~mgr->bypass_usage, buf->buffer->usage));
|
||||
assert(buf->buffer->size >= size);
|
||||
|
||||
pipe_reference_init(&buf->base.reference, 1);
|
||||
|
|
@ -395,6 +243,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
|
|||
|
||||
buf->base.vtbl = &pb_cache_buffer_vtbl;
|
||||
buf->mgr = mgr;
|
||||
pb_cache_init_entry(&mgr->cache, &buf->cache_entry, &buf->base);
|
||||
|
||||
return &buf->base;
|
||||
}
|
||||
|
|
@ -404,19 +253,8 @@ static void
|
|||
pb_cache_manager_flush(struct pb_manager *_mgr)
|
||||
{
|
||||
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
|
||||
struct list_head *curr, *next;
|
||||
struct pb_cache_buffer *buf;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
curr = mgr->delayed.next;
|
||||
next = curr->next;
|
||||
while(curr != &mgr->delayed) {
|
||||
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
|
||||
_pb_cache_buffer_destroy(buf);
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
pb_cache_release_all_buffers(&mgr->cache);
|
||||
|
||||
assert(mgr->provider->flush);
|
||||
if(mgr->provider->flush)
|
||||
|
|
@ -425,9 +263,11 @@ pb_cache_manager_flush(struct pb_manager *_mgr)
|
|||
|
||||
|
||||
static void
|
||||
pb_cache_manager_destroy(struct pb_manager *mgr)
|
||||
pb_cache_manager_destroy(struct pb_manager *_mgr)
|
||||
{
|
||||
pb_cache_manager_flush(mgr);
|
||||
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
|
||||
|
||||
pb_cache_deinit(&mgr->cache);
|
||||
FREE(mgr);
|
||||
}
|
||||
|
||||
|
|
@ -465,13 +305,9 @@ pb_cache_manager_create(struct pb_manager *provider,
|
|||
mgr->base.create_buffer = pb_cache_manager_create_buffer;
|
||||
mgr->base.flush = pb_cache_manager_flush;
|
||||
mgr->provider = provider;
|
||||
mgr->usecs = usecs;
|
||||
mgr->size_factor = size_factor;
|
||||
mgr->bypass_usage = bypass_usage;
|
||||
LIST_INITHEAD(&mgr->delayed);
|
||||
mgr->numDelayed = 0;
|
||||
mgr->max_cache_size = maximum_cache_size;
|
||||
pipe_mutex_init(mgr->mutex);
|
||||
|
||||
pb_cache_init(&mgr->cache, usecs, size_factor, bypass_usage,
|
||||
maximum_cache_size,
|
||||
_pb_cache_buffer_destroy,
|
||||
pb_cache_can_reclaim_buffer);
|
||||
return &mgr->base;
|
||||
}
|
||||
|
|
|
|||
286
src/gallium/auxiliary/pipebuffer/pb_cache.c
Normal file
286
src/gallium/auxiliary/pipebuffer/pb_cache.c
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007-2008 VMware, Inc.
|
||||
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "pb_cache.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_time.h"
|
||||
|
||||
|
||||
/**
|
||||
* Actually destroy the buffer.
|
||||
*/
|
||||
static void
|
||||
destroy_buffer_locked(struct pb_cache_entry *entry)
|
||||
{
|
||||
struct pb_cache *mgr = entry->mgr;
|
||||
|
||||
assert(!pipe_is_referenced(&entry->buffer->reference));
|
||||
if (entry->head.next) {
|
||||
LIST_DEL(&entry->head);
|
||||
assert(mgr->num_buffers);
|
||||
--mgr->num_buffers;
|
||||
mgr->cache_size -= entry->buffer->size;
|
||||
}
|
||||
entry->mgr->destroy_buffer(entry->buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free as many cache buffers from the list head as possible.
|
||||
*/
|
||||
static void
|
||||
release_expired_buffers_locked(struct pb_cache *mgr)
|
||||
{
|
||||
struct list_head *curr, *next;
|
||||
struct pb_cache_entry *entry;
|
||||
int64_t now;
|
||||
|
||||
now = os_time_get();
|
||||
|
||||
curr = mgr->cache.next;
|
||||
next = curr->next;
|
||||
while (curr != &mgr->cache) {
|
||||
entry = LIST_ENTRY(struct pb_cache_entry, curr, head);
|
||||
|
||||
if (!os_time_timeout(entry->start, entry->end, now))
|
||||
break;
|
||||
|
||||
destroy_buffer_locked(entry);
|
||||
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a buffer to the cache. This is typically done when the buffer is
|
||||
* being released.
|
||||
*/
|
||||
void
|
||||
pb_cache_add_buffer(struct pb_cache_entry *entry)
|
||||
{
|
||||
struct pb_cache *mgr = entry->mgr;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
assert(!pipe_is_referenced(&entry->buffer->reference));
|
||||
|
||||
release_expired_buffers_locked(mgr);
|
||||
|
||||
/* Directly release any buffer that exceeds the limit. */
|
||||
if (mgr->cache_size + entry->buffer->size > mgr->max_cache_size) {
|
||||
entry->mgr->destroy_buffer(entry->buffer);
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
entry->start = os_time_get();
|
||||
entry->end = entry->start + mgr->usecs;
|
||||
LIST_ADDTAIL(&entry->head, &mgr->cache);
|
||||
++mgr->num_buffers;
|
||||
mgr->cache_size += entry->buffer->size;
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return 1 if compatible and can be reclaimed
|
||||
* 0 if incompatible
|
||||
* -1 if compatible and can't be reclaimed
|
||||
*/
|
||||
static int
|
||||
pb_cache_is_buffer_compat(struct pb_cache_entry *entry,
|
||||
pb_size size, unsigned alignment, unsigned usage)
|
||||
{
|
||||
struct pb_buffer *buf = entry->buffer;
|
||||
|
||||
if (usage & entry->mgr->bypass_usage)
|
||||
return 0;
|
||||
|
||||
if (buf->size < size)
|
||||
return 0;
|
||||
|
||||
/* be lenient with size */
|
||||
if (buf->size > (unsigned) (entry->mgr->size_factor * size))
|
||||
return 0;
|
||||
|
||||
if (!pb_check_alignment(alignment, buf->alignment))
|
||||
return 0;
|
||||
|
||||
if (!pb_check_usage(usage, buf->usage))
|
||||
return 0;
|
||||
|
||||
return entry->mgr->can_reclaim(buf) ? 1 : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a compatible buffer in the cache, return it, and remove it
|
||||
* from the cache.
|
||||
*/
|
||||
struct pb_buffer *
|
||||
pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
|
||||
unsigned alignment, unsigned usage)
|
||||
{
|
||||
struct pb_cache_entry *entry;
|
||||
struct pb_cache_entry *cur_entry;
|
||||
struct list_head *cur, *next;
|
||||
int64_t now;
|
||||
int ret = 0;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
|
||||
entry = NULL;
|
||||
cur = mgr->cache.next;
|
||||
next = cur->next;
|
||||
|
||||
/* search in the expired buffers, freeing them in the process */
|
||||
now = os_time_get();
|
||||
while (cur != &mgr->cache) {
|
||||
cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
|
||||
|
||||
if (!entry && (ret = pb_cache_is_buffer_compat(cur_entry, size,
|
||||
alignment, usage) > 0))
|
||||
entry = cur_entry;
|
||||
else if (os_time_timeout(cur_entry->start, cur_entry->end, now))
|
||||
destroy_buffer_locked(cur_entry);
|
||||
else
|
||||
/* This buffer (and all hereafter) are still hot in cache */
|
||||
break;
|
||||
|
||||
/* the buffer is busy (and probably all remaining ones too) */
|
||||
if (ret == -1)
|
||||
break;
|
||||
|
||||
cur = next;
|
||||
next = cur->next;
|
||||
}
|
||||
|
||||
/* keep searching in the hot buffers */
|
||||
if (!entry && ret != -1) {
|
||||
while (cur != &mgr->cache) {
|
||||
cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
|
||||
ret = pb_cache_is_buffer_compat(cur_entry, size, alignment, usage);
|
||||
|
||||
if (ret > 0) {
|
||||
entry = cur_entry;
|
||||
break;
|
||||
}
|
||||
if (ret == -1)
|
||||
break;
|
||||
/* no need to check the timeout here */
|
||||
cur = next;
|
||||
next = cur->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* found a compatible buffer, return it */
|
||||
if (entry) {
|
||||
struct pb_buffer *buf = entry->buffer;
|
||||
|
||||
mgr->cache_size -= buf->size;
|
||||
LIST_DEL(&entry->head);
|
||||
--mgr->num_buffers;
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
/* Increase refcount */
|
||||
pipe_reference_init(&buf->reference, 1);
|
||||
return buf;
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Empty the cache. Useful when there is not enough memory.
|
||||
*/
|
||||
void
|
||||
pb_cache_release_all_buffers(struct pb_cache *mgr)
|
||||
{
|
||||
struct list_head *curr, *next;
|
||||
struct pb_cache_entry *buf;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
curr = mgr->cache.next;
|
||||
next = curr->next;
|
||||
while (curr != &mgr->cache) {
|
||||
buf = LIST_ENTRY(struct pb_cache_entry, curr, head);
|
||||
destroy_buffer_locked(buf);
|
||||
curr = next;
|
||||
next = curr->next;
|
||||
}
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
}
|
||||
|
||||
void
|
||||
pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry,
|
||||
struct pb_buffer *buf)
|
||||
{
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
entry->buffer = buf;
|
||||
entry->mgr = mgr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a caching buffer manager.
|
||||
*
|
||||
* @param mgr The cache buffer manager
|
||||
* @param usecs Unused buffers may be released from the cache after this
|
||||
* time
|
||||
* @param size_factor Declare buffers that are size_factor times bigger than
|
||||
* the requested size as cache hits.
|
||||
* @param bypass_usage Bitmask. If (requested usage & bypass_usage) != 0,
|
||||
* buffer allocation requests are rejected.
|
||||
* @param maximum_cache_size Maximum size of all unused buffers the cache can
|
||||
* hold.
|
||||
* @param destroy_buffer Function that destroys a buffer for good.
|
||||
* @param can_reclaim Whether a buffer can be reclaimed (e.g. is not busy)
|
||||
*/
|
||||
void
|
||||
pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor,
|
||||
unsigned bypass_usage, uint64_t maximum_cache_size,
|
||||
void (*destroy_buffer)(struct pb_buffer *buf),
|
||||
bool (*can_reclaim)(struct pb_buffer *buf))
|
||||
{
|
||||
LIST_INITHEAD(&mgr->cache);
|
||||
pipe_mutex_init(mgr->mutex);
|
||||
mgr->cache_size = 0;
|
||||
mgr->max_cache_size = maximum_cache_size;
|
||||
mgr->usecs = usecs;
|
||||
mgr->num_buffers = 0;
|
||||
mgr->bypass_usage = bypass_usage;
|
||||
mgr->size_factor = size_factor;
|
||||
mgr->destroy_buffer = destroy_buffer;
|
||||
mgr->can_reclaim = can_reclaim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deinitialize the manager completely.
|
||||
*/
|
||||
void
|
||||
pb_cache_deinit(struct pb_cache *mgr)
|
||||
{
|
||||
pb_cache_release_all_buffers(mgr);
|
||||
pipe_mutex_destroy(mgr->mutex);
|
||||
}
|
||||
74
src/gallium/auxiliary/pipebuffer/pb_cache.h
Normal file
74
src/gallium/auxiliary/pipebuffer/pb_cache.h
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007-2008 VMware, Inc.
|
||||
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef PB_CACHE_H
|
||||
#define PB_CACHE_H
|
||||
|
||||
#include "pb_buffer.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_thread.h"
|
||||
|
||||
/**
|
||||
* Statically inserted into the driver-specific buffer structure.
|
||||
*/
|
||||
struct pb_cache_entry
|
||||
{
|
||||
struct list_head head;
|
||||
struct pb_buffer *buffer; /**< Pointer to the structure this is part of. */
|
||||
struct pb_cache *mgr;
|
||||
int64_t start, end; /**< Caching time interval */
|
||||
};
|
||||
|
||||
struct pb_cache
|
||||
{
|
||||
struct list_head cache;
|
||||
pipe_mutex mutex;
|
||||
uint64_t cache_size;
|
||||
uint64_t max_cache_size;
|
||||
unsigned usecs;
|
||||
unsigned num_buffers;
|
||||
unsigned bypass_usage;
|
||||
float size_factor;
|
||||
|
||||
void (*destroy_buffer)(struct pb_buffer *buf);
|
||||
bool (*can_reclaim)(struct pb_buffer *buf);
|
||||
};
|
||||
|
||||
void pb_cache_add_buffer(struct pb_cache_entry *entry);
|
||||
struct pb_buffer *pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
|
||||
unsigned alignment, unsigned usage);
|
||||
void pb_cache_release_all_buffers(struct pb_cache *mgr);
|
||||
void pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry,
|
||||
struct pb_buffer *buf);
|
||||
void pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor,
|
||||
unsigned bypass_usage, uint64_t maximum_cache_size,
|
||||
void (*destroy_buffer)(struct pb_buffer *buf),
|
||||
bool (*can_reclaim)(struct pb_buffer *buf));
|
||||
void pb_cache_deinit(struct pb_cache *mgr);
|
||||
|
||||
#endif
|
||||
|
|
@ -365,6 +365,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
info->output_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->num_outputs++;
|
||||
|
||||
if (semName == TGSI_SEMANTIC_COLOR)
|
||||
info->colors_written |= 1 << semIndex;
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX ||
|
||||
procType == TGSI_PROCESSOR_GEOMETRY ||
|
||||
procType == TGSI_PROCESSOR_TESS_CTRL ||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ struct tgsi_shader_info
|
|||
|
||||
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
|
||||
|
||||
ubyte colors_written;
|
||||
boolean reads_position; /**< does fragment shader read position? */
|
||||
boolean reads_z; /**< does fragment shader read depth? */
|
||||
boolean writes_z; /**< does fragment shader write Z value? */
|
||||
|
|
|
|||
|
|
@ -295,7 +295,6 @@ struct r300_query {
|
|||
|
||||
/* The buffer where query results are stored. */
|
||||
struct pb_buffer *buf;
|
||||
struct radeon_winsys_cs_handle *cs_buf;
|
||||
};
|
||||
|
||||
struct r300_surface {
|
||||
|
|
@ -303,7 +302,6 @@ struct r300_surface {
|
|||
|
||||
/* Winsys buffer backing the texture. */
|
||||
struct pb_buffer *buf;
|
||||
struct radeon_winsys_cs_handle *cs_buf;
|
||||
|
||||
enum radeon_bo_domain domain;
|
||||
|
||||
|
|
@ -395,7 +393,6 @@ struct r300_resource
|
|||
|
||||
/* Winsys buffer backing this resource. */
|
||||
struct pb_buffer *buf;
|
||||
struct radeon_winsys_cs_handle *cs_buf;
|
||||
enum radeon_bo_domain domain;
|
||||
|
||||
/* Constant buffers and SWTCL vertex and index buffers are in user
|
||||
|
|
@ -460,7 +457,6 @@ struct r300_context {
|
|||
struct draw_context* draw;
|
||||
/* Vertex buffer for SW TCL. */
|
||||
struct pb_buffer *vbo;
|
||||
struct radeon_winsys_cs_handle *vbo_cs;
|
||||
/* Offset and size into the SW TCL VBO. */
|
||||
size_t draw_vbo_offset;
|
||||
|
||||
|
|
|
|||
|
|
@ -108,9 +108,9 @@
|
|||
|
||||
#define OUT_CS_RELOC(r) do { \
|
||||
assert((r)); \
|
||||
assert((r)->cs_buf); \
|
||||
assert((r)->buf); \
|
||||
OUT_CS(0xc0001000); /* PKT3_NOP */ \
|
||||
OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \
|
||||
OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->buf) * 4); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1047,9 +1047,9 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
|
|||
OUT_CS(r300->draw_vbo_offset);
|
||||
OUT_CS(0);
|
||||
|
||||
assert(r300->vbo_cs);
|
||||
assert(r300->vbo);
|
||||
OUT_CS(0xc0001000); /* PKT3_NOP */
|
||||
OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4);
|
||||
OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo) * 4);
|
||||
END_CS;
|
||||
}
|
||||
|
||||
|
|
@ -1320,7 +1320,7 @@ validate:
|
|||
continue;
|
||||
tex = r300_resource(fb->cbufs[i]->texture);
|
||||
assert(tex && tex->buf && "cbuf is marked, but NULL!");
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->buf,
|
||||
RADEON_USAGE_READWRITE,
|
||||
r300_surface(fb->cbufs[i])->domain,
|
||||
tex->b.b.nr_samples > 1 ?
|
||||
|
|
@ -1331,7 +1331,7 @@ validate:
|
|||
if (fb->zsbuf) {
|
||||
tex = r300_resource(fb->zsbuf->texture);
|
||||
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->buf,
|
||||
RADEON_USAGE_READWRITE,
|
||||
r300_surface(fb->zsbuf)->domain,
|
||||
tex->b.b.nr_samples > 1 ?
|
||||
|
|
@ -1342,7 +1342,7 @@ validate:
|
|||
/* The AA resolve buffer. */
|
||||
if (r300->aa_state.dirty) {
|
||||
if (aa->dest) {
|
||||
r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, aa->dest->buf,
|
||||
RADEON_USAGE_WRITE,
|
||||
aa->dest->domain,
|
||||
RADEON_PRIO_COLOR_BUFFER);
|
||||
|
|
@ -1356,18 +1356,18 @@ validate:
|
|||
}
|
||||
|
||||
tex = r300_resource(texstate->sampler_views[i]->base.texture);
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
|
||||
r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ,
|
||||
tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
|
||||
}
|
||||
}
|
||||
/* ...occlusion query buffer... */
|
||||
if (r300->query_current)
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf,
|
||||
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_QUERY);
|
||||
/* ...vertex buffer for SWTCL path... */
|
||||
if (r300->vbo_cs)
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs,
|
||||
if (r300->vbo)
|
||||
r300->rws->cs_add_buffer(r300->cs, r300->vbo,
|
||||
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_VERTEX_BUFFER);
|
||||
/* ...vertex buffers for HWTCL path... */
|
||||
|
|
@ -1382,7 +1382,7 @@ validate:
|
|||
if (!buf)
|
||||
continue;
|
||||
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf,
|
||||
RADEON_USAGE_READ,
|
||||
r300_resource(buf)->domain,
|
||||
RADEON_PRIO_SAMPLER_BUFFER);
|
||||
|
|
@ -1390,7 +1390,7 @@ validate:
|
|||
}
|
||||
/* ...and index buffer for HWTCL path. */
|
||||
if (index_buffer)
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf,
|
||||
r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf,
|
||||
RADEON_USAGE_READ,
|
||||
r300_resource(index_buffer)->domain,
|
||||
RADEON_PRIO_INDEX_BUFFER);
|
||||
|
|
|
|||
|
|
@ -64,8 +64,6 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
|
|||
FREE(q);
|
||||
return NULL;
|
||||
}
|
||||
q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
|
||||
|
||||
return (struct pipe_query*)q;
|
||||
}
|
||||
|
||||
|
|
@ -155,7 +153,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
|
|||
return vresult->b;
|
||||
}
|
||||
|
||||
map = r300->rws->buffer_map(q->cs_buf, r300->cs,
|
||||
map = r300->rws->buffer_map(q->buf, r300->cs,
|
||||
PIPE_TRANSFER_READ |
|
||||
(!wait ? PIPE_TRANSFER_DONTBLOCK : 0));
|
||||
if (!map)
|
||||
|
|
|
|||
|
|
@ -373,7 +373,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300,
|
|||
/* Map the buffer. */
|
||||
if (!map[vbi]) {
|
||||
map[vbi] = (uint32_t*)r300->rws->buffer_map(
|
||||
r300_resource(vbuf->buffer)->cs_buf,
|
||||
r300_resource(vbuf->buffer)->buf,
|
||||
r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
|
||||
}
|
||||
|
|
@ -606,7 +606,7 @@ static void r300_draw_elements(struct r300_context *r300,
|
|||
/* Fallback for misaligned ushort indices. */
|
||||
if (indexSize == 2 && (start & 1) && indexBuffer) {
|
||||
/* If we got here, then orgIndexBuffer == indexBuffer. */
|
||||
uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->cs_buf,
|
||||
uint16_t *ptr = r300->rws->buffer_map(r300_resource(orgIndexBuffer)->buf,
|
||||
r300->cs,
|
||||
PIPE_TRANSFER_READ |
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
|
|
@ -899,7 +899,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
|
|||
|
||||
if (!r300->vbo || size + r300->draw_vbo_offset > r300->vbo->size) {
|
||||
pb_reference(&r300->vbo, NULL);
|
||||
r300->vbo_cs = NULL;
|
||||
r300->vbo = NULL;
|
||||
r300render->vbo_ptr = NULL;
|
||||
|
||||
r300->vbo = rws->buffer_create(rws,
|
||||
|
|
@ -909,9 +909,8 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
|
|||
if (!r300->vbo) {
|
||||
return FALSE;
|
||||
}
|
||||
r300->vbo_cs = rws->buffer_get_cs_handle(r300->vbo);
|
||||
r300->draw_vbo_offset = 0;
|
||||
r300render->vbo_ptr = rws->buffer_map(r300->vbo_cs, r300->cs,
|
||||
r300render->vbo_ptr = rws->buffer_map(r300->vbo, r300->cs,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
|
|||
assert(usage & PIPE_TRANSFER_WRITE);
|
||||
|
||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) ||
|
||||
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->buf, RADEON_USAGE_READWRITE) ||
|
||||
!r300->rws->buffer_wait(rbuf->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||
unsigned i;
|
||||
struct pb_buffer *new_buf;
|
||||
|
|
@ -108,7 +108,6 @@ r300_buffer_transfer_map( struct pipe_context *context,
|
|||
/* Discard the old buffer. */
|
||||
pb_reference(&rbuf->buf, NULL);
|
||||
rbuf->buf = new_buf;
|
||||
rbuf->cs_buf = r300->rws->buffer_get_cs_handle(rbuf->buf);
|
||||
|
||||
/* We changed the buffer, now we need to bind it where the old one was bound. */
|
||||
for (i = 0; i < r300->nr_vertex_buffers; i++) {
|
||||
|
|
@ -127,7 +126,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
|
|||
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||
}
|
||||
|
||||
map = rws->buffer_map(rbuf->cs_buf, r300->cs, usage);
|
||||
map = rws->buffer_map(rbuf->buf, r300->cs, usage);
|
||||
|
||||
if (!map) {
|
||||
util_slab_free(&r300->pool_transfers, transfer);
|
||||
|
|
@ -190,9 +189,5 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
|
|||
FREE(rbuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rbuf->cs_buf =
|
||||
r300screen->rws->buffer_get_cs_handle(rbuf->buf);
|
||||
|
||||
return &rbuf->b.b;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1059,8 +1059,6 @@ r300_texture_create_object(struct r300_screen *rscreen,
|
|||
util_format_is_depth_or_stencil(base->format) ? "depth" : "color");
|
||||
}
|
||||
|
||||
tex->cs_buf = rws->buffer_get_cs_handle(tex->buf);
|
||||
|
||||
rws->buffer_set_tiling(tex->buf, NULL,
|
||||
tex->tex.microtile, tex->tex.macrotile[0],
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
|
|
@ -1169,7 +1167,7 @@ struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx,
|
|||
surface->base.u.tex.last_layer = surf_tmpl->u.tex.last_layer;
|
||||
|
||||
surface->buf = tex->buf;
|
||||
surface->cs_buf = tex->cs_buf;
|
||||
surface->buf = tex->buf;
|
||||
|
||||
/* Prefer VRAM if there are multiple domains to choose from. */
|
||||
surface->domain = tex->domain;
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
|
|||
char *map;
|
||||
|
||||
referenced_cs =
|
||||
r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE);
|
||||
r300->rws->cs_is_buffer_referenced(r300->cs, tex->buf, RADEON_USAGE_READWRITE);
|
||||
if (referenced_cs) {
|
||||
referenced_hw = TRUE;
|
||||
} else {
|
||||
|
|
@ -218,7 +218,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
|
|||
if (trans->linear_texture) {
|
||||
/* The detiled texture is of the same size as the region being mapped
|
||||
* (no offset needed). */
|
||||
map = r300->rws->buffer_map(trans->linear_texture->cs_buf,
|
||||
map = r300->rws->buffer_map(trans->linear_texture->buf,
|
||||
r300->cs, usage);
|
||||
if (!map) {
|
||||
pipe_resource_reference(
|
||||
|
|
@ -230,7 +230,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
|
|||
return map;
|
||||
} else {
|
||||
/* Tiling is disabled. */
|
||||
map = r300->rws->buffer_map(tex->cs_buf, r300->cs, usage);
|
||||
map = r300->rws->buffer_map(tex->buf, r300->cs, usage);
|
||||
if (!map) {
|
||||
FREE(trans);
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ void *evergreen_create_compute_state(
|
|||
shader->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
|
||||
ctx->b.ws->buffer_unmap(shader->code_bo->buf);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -613,7 +613,7 @@ static void evergreen_launch_grid(
|
|||
kernel->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
|
||||
ctx->b.ws->buffer_unmap(kernel->code_bo->buf);
|
||||
}
|
||||
shader->active_kernel = kernel;
|
||||
ctx->cs_shader_state.kernel_index = pc;
|
||||
|
|
|
|||
|
|
@ -1582,12 +1582,17 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
|
|||
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
|
||||
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
|
||||
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
} else {
|
||||
radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
|
||||
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
|
||||
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1828,10 +1833,7 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
|
|||
unsigned db_count_control = 0;
|
||||
unsigned db_render_override =
|
||||
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
|
||||
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
|
||||
/* There is a hang with HTILE if stencil is used and
|
||||
* fast stencil is enabled. */
|
||||
S_02800C_FAST_STENCIL_DISABLE(1);
|
||||
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
|
||||
|
||||
if (a->occlusion_query_enabled) {
|
||||
db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
|
||||
|
|
@ -1840,26 +1842,14 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
|
|||
}
|
||||
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
|
||||
}
|
||||
/* FIXME we should be able to use hyperz even if we are not writing to
|
||||
* zbuffer but somehow this trigger GPU lockup. See :
|
||||
*
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=60848
|
||||
*
|
||||
* Disable hyperz for now if not writing to zbuffer.
|
||||
|
||||
/* This is to fix a lockup when hyperz and alpha test are enabled at
|
||||
* the same time somehow GPU get confuse on which order to pick for
|
||||
* z test
|
||||
*/
|
||||
if (rctx->db_state.rsurf && rctx->db_state.rsurf->db_htile_surface && rctx->zwritemask) {
|
||||
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
|
||||
db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
|
||||
/* This is to fix a lockup when hyperz and alpha test are enabled at
|
||||
* the same time somehow GPU get confuse on which order to pick for
|
||||
* z test
|
||||
*/
|
||||
if (rctx->alphatest_state.sx_alpha_test_control) {
|
||||
db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
|
||||
}
|
||||
} else {
|
||||
db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
|
||||
}
|
||||
if (rctx->alphatest_state.sx_alpha_test_control)
|
||||
db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
|
||||
|
||||
if (a->flush_depthstencil_through_cb) {
|
||||
assert(a->copy_depth || a->copy_stencil);
|
||||
|
||||
|
|
|
|||
|
|
@ -2633,7 +2633,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
|
|||
} else {
|
||||
memcpy(bytecode, bc.bytecode, fs_size);
|
||||
}
|
||||
rctx->b.ws->buffer_unmap(shader->buffer->cs_buf);
|
||||
rctx->b.ws->buffer_unmap(shader->buffer->buf);
|
||||
|
||||
r600_bytecode_clear(&bc);
|
||||
return shader;
|
||||
|
|
|
|||
|
|
@ -533,7 +533,7 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
|
|||
/**
|
||||
* Global buffers are not really resources, they are are actually offsets
|
||||
* into a single global resource (r600_screen::global_pool). The means
|
||||
* they don't have their own cs_buf handle, so they cannot be passed
|
||||
* they don't have their own buf handle, so they cannot be passed
|
||||
* to r600_copy_buffer() and must be handled separately.
|
||||
*/
|
||||
static void r600_copy_global_buffer(struct pipe_context *ctx,
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
|
|||
rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
|
||||
r600_context_gfx_flush, rctx,
|
||||
rscreen->b.trace_bo ?
|
||||
rscreen->b.trace_bo->cs_buf : NULL);
|
||||
rscreen->b.trace_bo->buf : NULL);
|
||||
rctx->b.gfx.flush = r600_context_gfx_flush;
|
||||
|
||||
rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256,
|
||||
|
|
@ -663,7 +663,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
|
|||
templ.usage = PIPE_USAGE_DEFAULT;
|
||||
|
||||
struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ));
|
||||
unsigned char *map = ws->buffer_map(res->cs_buf, NULL, PIPE_TRANSFER_WRITE);
|
||||
unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_TRANSFER_WRITE);
|
||||
|
||||
memset(map, 0, 256);
|
||||
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ static int store_shader(struct pipe_context *ctx,
|
|||
} else {
|
||||
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
|
||||
}
|
||||
rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
|
||||
rctx->b.ws->buffer_unmap(shader->bo->buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1745,6 +1745,8 @@ static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
|
|||
temp_reg, i,
|
||||
temp_reg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, 4 * i);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
for (i = 0; i < 4; i++) {
|
||||
/* emit an LDS_READ_RET */
|
||||
|
|
@ -3144,7 +3146,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
ctx.nliterals = 0;
|
||||
ctx.literals = NULL;
|
||||
|
||||
shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
|
||||
shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
|
||||
ctx.info.colors_written == 1;
|
||||
shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
|
||||
shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
|
||||
|
||||
|
|
|
|||
|
|
@ -121,11 +121,9 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
|
|||
if (!resources[i])
|
||||
continue;
|
||||
|
||||
/* recreate the CS handle */
|
||||
resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle(
|
||||
resources[i]->resource.buf);
|
||||
/* reset the address */
|
||||
resources[i]->resource.gpu_address = ctx->b.ws->buffer_get_virtual_address(
|
||||
resources[i]->resource.cs_buf);
|
||||
resources[i]->resource.buf);
|
||||
}
|
||||
|
||||
template.height *= array_size;
|
||||
|
|
@ -155,7 +153,7 @@ static uint32_t eg_num_banks(uint32_t nbanks)
|
|||
}
|
||||
|
||||
/* set the decoding target buffer offsets */
|
||||
static struct radeon_winsys_cs_handle* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
|
||||
static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
|
||||
{
|
||||
struct r600_screen *rscreen = (struct r600_screen*)buf->base.context->screen;
|
||||
struct r600_texture *luma = (struct r600_texture *)buf->resources[0];
|
||||
|
|
@ -166,18 +164,18 @@ static struct radeon_winsys_cs_handle* r600_uvd_set_dtb(struct ruvd_msg *msg, st
|
|||
|
||||
ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
|
||||
|
||||
return luma->resource.cs_buf;
|
||||
return luma->resource.buf;
|
||||
}
|
||||
|
||||
/* get the radeon resources for VCE */
|
||||
static void r600_vce_get_buffer(struct pipe_resource *resource,
|
||||
struct radeon_winsys_cs_handle **handle,
|
||||
struct pb_buffer **handle,
|
||||
struct radeon_surf **surface)
|
||||
{
|
||||
struct r600_texture *res = (struct r600_texture *)resource;
|
||||
|
||||
if (handle)
|
||||
*handle = res->resource.cs_buf;
|
||||
*handle = res->resource.buf;
|
||||
|
||||
if (surface)
|
||||
*surface = &res->surface;
|
||||
|
|
|
|||
|
|
@ -229,13 +229,17 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
|||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
} else if (overrast_samples > 1) {
|
||||
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
|
||||
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
} else {
|
||||
radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
|
||||
|
|
@ -245,6 +249,8 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
|||
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@
|
|||
#include <stdio.h>
|
||||
|
||||
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
|
||||
|
|
@ -52,7 +52,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
|||
bool busy = false;
|
||||
|
||||
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
|
||||
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
|
||||
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
||||
}
|
||||
|
||||
if (!(usage & PIPE_TRANSFER_WRITE)) {
|
||||
|
|
@ -62,7 +62,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
|||
|
||||
if (ctx->gfx.cs->cdw != ctx->initial_gfx_cs_size &&
|
||||
ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
|
||||
resource->cs_buf, rusage)) {
|
||||
resource->buf, rusage)) {
|
||||
if (usage & PIPE_TRANSFER_DONTBLOCK) {
|
||||
ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
return NULL;
|
||||
|
|
@ -74,7 +74,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
|||
if (ctx->dma.cs &&
|
||||
ctx->dma.cs->cdw &&
|
||||
ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
|
||||
resource->cs_buf, rusage)) {
|
||||
resource->buf, rusage)) {
|
||||
if (usage & PIPE_TRANSFER_DONTBLOCK) {
|
||||
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
return NULL;
|
||||
|
|
@ -97,7 +97,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
|||
}
|
||||
|
||||
/* Setting the CS to NULL will prevent doing checks we have done already. */
|
||||
return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
|
||||
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
||||
}
|
||||
|
||||
bool r600_init_resource(struct r600_common_screen *rscreen,
|
||||
|
|
@ -179,11 +179,10 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
|
|||
* the same buffer where one of the contexts invalidates it while
|
||||
* the others are using it. */
|
||||
old_buf = res->buf;
|
||||
res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */
|
||||
res->buf = new_buf; /* should be atomic */
|
||||
|
||||
if (rscreen->info.r600_virtual_address)
|
||||
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->cs_buf);
|
||||
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
|
||||
else
|
||||
res->gpu_address = 0;
|
||||
|
||||
|
|
@ -278,7 +277,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
|||
assert(usage & PIPE_TRANSFER_WRITE);
|
||||
|
||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
|
||||
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
|
||||
}
|
||||
|
|
@ -292,7 +291,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
|||
assert(usage & PIPE_TRANSFER_WRITE);
|
||||
|
||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
|
||||
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||
/* Do a wait-free write-only transfer using a temporary buffer. */
|
||||
unsigned offset;
|
||||
|
|
@ -483,11 +482,9 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf);
|
||||
|
||||
if (rscreen->info.r600_virtual_address)
|
||||
rbuffer->gpu_address =
|
||||
ws->buffer_get_virtual_address(rbuffer->cs_buf);
|
||||
ws->buffer_get_virtual_address(rbuffer->buf);
|
||||
else
|
||||
rbuffer->gpu_address = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
|
|||
enum radeon_bo_priority priority)
|
||||
{
|
||||
assert(usage);
|
||||
return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
|
||||
return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage,
|
||||
rbo->domains, priority) * 4;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
|
|||
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
|
||||
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
|
||||
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
|
||||
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
|
||||
|
||||
DEBUG_NAMED_VALUE_END /* must be last */
|
||||
};
|
||||
|
|
@ -947,7 +948,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
|||
PIPE_USAGE_STAGING,
|
||||
4096);
|
||||
if (rscreen->trace_bo) {
|
||||
rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
|
||||
rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->buf, NULL,
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@
|
|||
#define DBG_CHECK_VM (1llu << 42)
|
||||
#define DBG_NO_DCC (1llu << 43)
|
||||
#define DBG_NO_DCC_CLEAR (1llu << 44)
|
||||
#define DBG_NO_RB_PLUS (1llu << 45)
|
||||
|
||||
#define R600_MAP_BUFFER_ALIGNMENT 64
|
||||
|
||||
|
|
@ -133,7 +134,6 @@ struct r600_resource {
|
|||
|
||||
/* Winsys objects. */
|
||||
struct pb_buffer *buf;
|
||||
struct radeon_winsys_cs_handle *cs_buf;
|
||||
uint64_t gpu_address;
|
||||
|
||||
/* Resource state. */
|
||||
|
|
@ -221,6 +221,8 @@ struct r600_texture {
|
|||
struct r600_resource *htile_buffer;
|
||||
bool depth_cleared; /* if it was cleared at least once */
|
||||
float depth_clear_value;
|
||||
bool stencil_cleared; /* if it was cleared at least once */
|
||||
uint8_t stencil_clear_value;
|
||||
|
||||
bool non_disp_tiling; /* R600-Cayman only */
|
||||
};
|
||||
|
|
@ -250,6 +252,8 @@ struct r600_surface {
|
|||
unsigned cb_color_fmask_slice; /* EG and later */
|
||||
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
|
||||
unsigned cb_color_mask; /* R600 only */
|
||||
unsigned sx_ps_downconvert; /* Stoney only */
|
||||
unsigned sx_blend_opt_epsilon; /* Stoney only */
|
||||
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
|
||||
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
|
||||
|
||||
|
|
@ -473,7 +477,7 @@ struct r600_common_context {
|
|||
|
||||
/* r600_buffer.c */
|
||||
boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage);
|
||||
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
||||
struct r600_resource *resource,
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
|
|||
struct r600_resource *buffer)
|
||||
{
|
||||
/* Callers ensure that the buffer is currently unused by the GPU. */
|
||||
uint32_t *results = ctx->ws->buffer_map(buffer->cs_buf, NULL,
|
||||
uint32_t *results = ctx->ws->buffer_map(buffer->buf, NULL,
|
||||
PIPE_TRANSFER_WRITE |
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
|
||||
|
|
@ -667,7 +667,7 @@ static void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
|||
|
||||
if (query->flags & R600_QUERY_HW_FLAG_PREDICATE) {
|
||||
/* Obtain a new buffer if the current one can't be mapped without a stall. */
|
||||
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
|
||||
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
|
||||
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||
pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, NULL);
|
||||
query->buffer.buf = r600_new_query_buffer(rctx, query);
|
||||
|
|
|
|||
|
|
@ -497,10 +497,6 @@ static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
|
|||
if (rscreen->debug_flags & DBG_NO_DCC)
|
||||
return;
|
||||
|
||||
/* TODO: DCC is broken on Stoney */
|
||||
if (rscreen->family == CHIP_STONEY)
|
||||
return;
|
||||
|
||||
rtex->dcc_buffer = (struct r600_resource *)
|
||||
r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment);
|
||||
|
|
@ -758,9 +754,8 @@ r600_texture_create_object(struct pipe_screen *screen,
|
|||
}
|
||||
} else {
|
||||
resource->buf = buf;
|
||||
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
|
||||
resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->cs_buf);
|
||||
resource->domains = rscreen->ws->buffer_get_initial_domain(resource->cs_buf);
|
||||
resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
|
||||
resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
|
||||
}
|
||||
|
||||
if (rtex->cmask.size) {
|
||||
|
|
@ -1028,7 +1023,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
|||
/* Untiled buffers in VRAM, which is slow for CPU reads */
|
||||
use_staging_texture = TRUE;
|
||||
} else if (!(usage & PIPE_TRANSFER_READ) &&
|
||||
(r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
|
||||
(r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, RADEON_USAGE_READWRITE) ||
|
||||
!rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
|
||||
/* Use a staging texture for uploads if the underlying BO is busy. */
|
||||
use_staging_texture = TRUE;
|
||||
|
|
@ -1393,6 +1388,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
|||
return;
|
||||
|
||||
for (i = 0; i < fb->nr_cbufs; i++) {
|
||||
struct r600_surface *surf;
|
||||
struct r600_texture *tex;
|
||||
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
|
||||
|
||||
|
|
@ -1403,6 +1399,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
|||
if (!(*buffers & clear_bit))
|
||||
continue;
|
||||
|
||||
surf = (struct r600_surface *)fb->cbufs[i];
|
||||
tex = (struct r600_texture *)fb->cbufs[i]->texture;
|
||||
|
||||
/* 128-bit formats are unusupported */
|
||||
|
|
@ -1449,6 +1446,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
|||
if (clear_words_needed)
|
||||
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
|
||||
} else {
|
||||
/* RB+ doesn't work with CMASK fast clear. */
|
||||
if (surf->sx_ps_downconvert)
|
||||
continue;
|
||||
|
||||
/* ensure CMASK is enabled */
|
||||
r600_texture_alloc_cmask_separate(rctx->screen, tex);
|
||||
if (tex->cmask.size == 0) {
|
||||
|
|
|
|||
|
|
@ -179,6 +179,8 @@
|
|||
|
||||
#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
|
||||
#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
|
||||
#define EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 25)
|
||||
#define EG_S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 26)
|
||||
|
||||
#define CM_R_028804_DB_EQAA 0x00028804
|
||||
#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
|
||||
|
|
|
|||
|
|
@ -105,16 +105,16 @@ static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
|
|||
|
||||
/* send a command to the VCPU through the GPCOM registers */
|
||||
static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
|
||||
struct radeon_winsys_cs_handle* cs_buf, uint32_t off,
|
||||
struct pb_buffer* buf, uint32_t off,
|
||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain)
|
||||
{
|
||||
int reloc_idx;
|
||||
|
||||
reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain,
|
||||
reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain,
|
||||
RADEON_PRIO_UVD);
|
||||
if (!dec->use_legacy) {
|
||||
uint64_t addr;
|
||||
addr = dec->ws->buffer_get_virtual_address(cs_buf);
|
||||
addr = dec->ws->buffer_get_virtual_address(buf);
|
||||
addr = addr + off;
|
||||
set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
|
||||
set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
|
||||
|
|
@ -142,7 +142,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
|
|||
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
|
||||
|
||||
/* and map it for CPU access */
|
||||
ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
|
||||
ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
|
||||
|
||||
/* calc buffer offsets */
|
||||
dec->msg = (struct ruvd_msg *)ptr;
|
||||
|
|
@ -164,13 +164,13 @@ static void send_msg_buf(struct ruvd_decoder *dec)
|
|||
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
|
||||
|
||||
/* unmap the buffer */
|
||||
dec->ws->buffer_unmap(buf->res->cs_buf);
|
||||
dec->ws->buffer_unmap(buf->res->buf);
|
||||
dec->msg = NULL;
|
||||
dec->fb = NULL;
|
||||
dec->it = NULL;
|
||||
|
||||
/* and send it to the hardware */
|
||||
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
|
||||
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0,
|
||||
RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
|
||||
}
|
||||
|
||||
|
|
@ -852,7 +852,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder,
|
|||
|
||||
dec->bs_size = 0;
|
||||
dec->bs_ptr = dec->ws->buffer_map(
|
||||
dec->bs_buffers[dec->cur_buffer].res->cs_buf,
|
||||
dec->bs_buffers[dec->cur_buffer].res->buf,
|
||||
dec->cs, PIPE_TRANSFER_WRITE);
|
||||
}
|
||||
|
||||
|
|
@ -892,13 +892,13 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
|
|||
unsigned new_size = dec->bs_size + sizes[i];
|
||||
|
||||
if (new_size > buf->res->buf->size) {
|
||||
dec->ws->buffer_unmap(buf->res->cs_buf);
|
||||
dec->ws->buffer_unmap(buf->res->buf);
|
||||
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
|
||||
RVID_ERR("Can't resize bitstream buffer!");
|
||||
return;
|
||||
}
|
||||
|
||||
dec->bs_ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs,
|
||||
dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
if (!dec->bs_ptr)
|
||||
return;
|
||||
|
|
@ -920,7 +920,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
|||
struct pipe_picture_desc *picture)
|
||||
{
|
||||
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
|
||||
struct radeon_winsys_cs_handle *dt;
|
||||
struct pb_buffer *dt;
|
||||
struct rvid_buffer *msg_fb_it_buf, *bs_buf;
|
||||
unsigned bs_size;
|
||||
|
||||
|
|
@ -934,7 +934,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
|||
|
||||
bs_size = align(dec->bs_size, 128);
|
||||
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
|
||||
dec->ws->buffer_unmap(bs_buf->res->cs_buf);
|
||||
dec->ws->buffer_unmap(bs_buf->res->buf);
|
||||
|
||||
map_msg_fb_it_buf(dec);
|
||||
dec->msg->size = sizeof(*dec->msg);
|
||||
|
|
@ -995,20 +995,20 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
|||
|
||||
send_msg_buf(dec);
|
||||
|
||||
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
|
||||
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
|
||||
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
|
||||
if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
|
||||
send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0,
|
||||
send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
|
||||
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
|
||||
}
|
||||
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
|
||||
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
|
||||
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
|
||||
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
|
||||
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
|
||||
send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
|
||||
send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
|
||||
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
|
||||
if (have_it(dec))
|
||||
send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
|
||||
send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
|
||||
FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
|
||||
set_reg(dec, RUVD_ENGINE_CNTL, 1);
|
||||
|
||||
|
|
|
|||
|
|
@ -421,7 +421,7 @@ struct ruvd_msg {
|
|||
};
|
||||
|
||||
/* driver dependent callback */
|
||||
typedef struct radeon_winsys_cs_handle* (*ruvd_set_dtb)
|
||||
typedef struct pb_buffer* (*ruvd_set_dtb)
|
||||
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
|
||||
|
||||
/* create an UVD decode */
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ static void flush(struct rvce_encoder *enc)
|
|||
#if 0
|
||||
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
|
||||
{
|
||||
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||
unsigned i = 0;
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
|
||||
|
|
@ -83,7 +83,7 @@ static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
|
|||
fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
|
||||
fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
|
||||
fprintf(stderr, "\n");
|
||||
enc->ws->buffer_unmap(fb->res->cs_buf);
|
||||
enc->ws->buffer_unmap(fb->res->buf);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -346,7 +346,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
|
|||
struct rvid_buffer *fb = feedback;
|
||||
|
||||
if (size) {
|
||||
uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||
|
||||
if (ptr[1]) {
|
||||
*size = ptr[4] - ptr[9];
|
||||
|
|
@ -354,7 +354,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
|
|||
*size = 0;
|
||||
}
|
||||
|
||||
enc->ws->buffer_unmap(fb->res->cs_buf);
|
||||
enc->ws->buffer_unmap(fb->res->buf);
|
||||
}
|
||||
//dump_feedback(enc, fb);
|
||||
rvid_destroy_buffer(fb);
|
||||
|
|
@ -522,7 +522,7 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
|
|||
/**
|
||||
* Add the buffer as relocation to the current command submission
|
||||
*/
|
||||
void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
|
||||
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||
signed offset)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ struct r600_common_screen;
|
|||
|
||||
/* driver dependent callback */
|
||||
typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
|
||||
struct radeon_winsys_cs_handle **handle,
|
||||
struct pb_buffer **handle,
|
||||
struct radeon_surf **surface);
|
||||
|
||||
/* Coded picture buffer slot */
|
||||
|
|
@ -92,11 +92,11 @@ struct rvce_encoder {
|
|||
|
||||
rvce_get_buffer get_buffer;
|
||||
|
||||
struct radeon_winsys_cs_handle* handle;
|
||||
struct pb_buffer* handle;
|
||||
struct radeon_surf* luma;
|
||||
struct radeon_surf* chroma;
|
||||
|
||||
struct radeon_winsys_cs_handle* bs_handle;
|
||||
struct pb_buffer* bs_handle;
|
||||
unsigned bs_size;
|
||||
|
||||
struct rvce_cpb_slot *cpb_array;
|
||||
|
|
@ -130,7 +130,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
|||
|
||||
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
|
||||
|
||||
void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
|
||||
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||
signed offset);
|
||||
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ static void task_info(struct rvce_encoder *enc, uint32_t op,
|
|||
static void feedback(struct rvce_encoder *enc)
|
||||
{
|
||||
RVCE_BEGIN(0x05000005); // feedback buffer
|
||||
RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
|
||||
RVCE_WRITE(enc->fb->res->buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
|
||||
RVCE_CS(0x00000001); // feedbackRingSize
|
||||
RVCE_END();
|
||||
}
|
||||
|
|
@ -303,7 +303,7 @@ static void encode(struct rvce_encoder *enc)
|
|||
enc->task_info(enc, 0x00000003, 0, 0, 0);
|
||||
|
||||
RVCE_BEGIN(0x05000001); // context buffer
|
||||
RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
|
||||
RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
|
||||
RVCE_END();
|
||||
|
||||
RVCE_BEGIN(0x05000004); // video bitstream buffer
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ static void encode(struct rvce_encoder *enc)
|
|||
enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
|
||||
|
||||
RVCE_BEGIN(0x05000001); // context buffer
|
||||
RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
|
||||
RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
|
||||
RVCE_END();
|
||||
|
||||
bs_offset = -(signed)(bs_idx * enc->bs_size);
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ static void encode(struct rvce_encoder *enc)
|
|||
enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
|
||||
|
||||
RVCE_BEGIN(0x05000001); // context buffer
|
||||
RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
|
||||
RVCE_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
|
||||
RVCE_END();
|
||||
|
||||
bs_offset = -(signed)(bs_idx * enc->bs_size);
|
||||
|
|
|
|||
|
|
@ -89,11 +89,11 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
|||
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
|
||||
goto error;
|
||||
|
||||
src = ws->buffer_map(old_buf.res->cs_buf, cs, PIPE_TRANSFER_READ);
|
||||
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
|
||||
if (!src)
|
||||
goto error;
|
||||
|
||||
dst = ws->buffer_map(new_buf->res->cs_buf, cs, PIPE_TRANSFER_WRITE);
|
||||
dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
|
||||
if (!dst)
|
||||
goto error;
|
||||
|
||||
|
|
@ -103,14 +103,14 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
|||
dst += bytes;
|
||||
memset(dst, 0, new_size);
|
||||
}
|
||||
ws->buffer_unmap(new_buf->res->cs_buf);
|
||||
ws->buffer_unmap(old_buf.res->cs_buf);
|
||||
ws->buffer_unmap(new_buf->res->buf);
|
||||
ws->buffer_unmap(old_buf.res->buf);
|
||||
rvid_destroy_buffer(&old_buf);
|
||||
return true;
|
||||
|
||||
error:
|
||||
if (src)
|
||||
ws->buffer_unmap(old_buf.res->cs_buf);
|
||||
ws->buffer_unmap(old_buf.res->buf);
|
||||
rvid_destroy_buffer(new_buf);
|
||||
*new_buf = old_buf;
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -235,7 +235,6 @@ enum radeon_bo_priority {
|
|||
};
|
||||
|
||||
struct winsys_handle;
|
||||
struct radeon_winsys_cs_handle;
|
||||
struct radeon_winsys_ctx;
|
||||
|
||||
struct radeon_winsys_cs {
|
||||
|
|
@ -434,9 +433,6 @@ struct radeon_winsys {
|
|||
enum radeon_bo_domain domain,
|
||||
enum radeon_bo_flag flags);
|
||||
|
||||
struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
|
||||
struct pb_buffer *buf);
|
||||
|
||||
/**
|
||||
* Map the entire data store of a buffer object into the client's address
|
||||
* space.
|
||||
|
|
@ -446,7 +442,7 @@ struct radeon_winsys {
|
|||
* \param usage A bitmask of the PIPE_TRANSFER_* flags.
|
||||
* \return The pointer at the beginning of the buffer.
|
||||
*/
|
||||
void *(*buffer_map)(struct radeon_winsys_cs_handle *buf,
|
||||
void *(*buffer_map)(struct pb_buffer *buf,
|
||||
struct radeon_winsys_cs *cs,
|
||||
enum pipe_transfer_usage usage);
|
||||
|
||||
|
|
@ -455,7 +451,7 @@ struct radeon_winsys {
|
|||
*
|
||||
* \param buf A winsys buffer object to unmap.
|
||||
*/
|
||||
void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
|
||||
void (*buffer_unmap)(struct pb_buffer *buf);
|
||||
|
||||
/**
|
||||
* Wait for the buffer and return true if the buffer is not used
|
||||
|
|
@ -552,12 +548,12 @@ struct radeon_winsys {
|
|||
* \param buf A winsys buffer object
|
||||
* \return virtual address
|
||||
*/
|
||||
uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf);
|
||||
uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
|
||||
|
||||
/**
|
||||
* Query the initial placement of the buffer from the kernel driver.
|
||||
*/
|
||||
enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf);
|
||||
enum radeon_bo_domain (*buffer_get_initial_domain)(struct pb_buffer *buf);
|
||||
|
||||
/**************************************************************************
|
||||
* Command submission.
|
||||
|
|
@ -596,7 +592,7 @@ struct radeon_winsys {
|
|||
void (*flush)(void *ctx, unsigned flags,
|
||||
struct pipe_fence_handle **fence),
|
||||
void *flush_ctx,
|
||||
struct radeon_winsys_cs_handle *trace_buf);
|
||||
struct pb_buffer *trace_buf);
|
||||
|
||||
/**
|
||||
* Destroy a command stream.
|
||||
|
|
@ -617,7 +613,7 @@ struct radeon_winsys {
|
|||
* \return Buffer index.
|
||||
*/
|
||||
unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domain,
|
||||
enum radeon_bo_priority priority);
|
||||
|
|
@ -630,7 +626,7 @@ struct radeon_winsys {
|
|||
* \return The buffer index, or -1 if the buffer has not been added.
|
||||
*/
|
||||
int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf);
|
||||
struct pb_buffer *buf);
|
||||
|
||||
/**
|
||||
* Return TRUE if there is enough memory in VRAM and GTT for the buffers
|
||||
|
|
@ -683,7 +679,7 @@ struct radeon_winsys {
|
|||
* \param buf A winsys buffer.
|
||||
*/
|
||||
boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -377,22 +377,39 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
}
|
||||
}
|
||||
|
||||
if (buffers & PIPE_CLEAR_DEPTH &&
|
||||
zstex && zstex->htile_buffer &&
|
||||
if (zstex && zstex->htile_buffer &&
|
||||
zsbuf->u.tex.level == 0 &&
|
||||
zsbuf->u.tex.first_layer == 0 &&
|
||||
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
|
||||
/* Need to disable EXPCLEAR temporarily if clearing
|
||||
* to a new value. */
|
||||
if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
|
||||
sctx->db_depth_disable_expclear = true;
|
||||
if (buffers & PIPE_CLEAR_DEPTH) {
|
||||
/* Need to disable EXPCLEAR temporarily if clearing
|
||||
* to a new value. */
|
||||
if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
|
||||
sctx->db_depth_disable_expclear = true;
|
||||
}
|
||||
|
||||
zstex->depth_clear_value = depth;
|
||||
sctx->framebuffer.dirty_zsbuf = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
|
||||
sctx->db_depth_clear = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
|
||||
zstex->depth_clear_value = depth;
|
||||
sctx->framebuffer.dirty_zsbuf = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
|
||||
sctx->db_depth_clear = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
if (buffers & PIPE_CLEAR_STENCIL) {
|
||||
stencil &= 0xff;
|
||||
|
||||
/* Need to disable EXPCLEAR temporarily if clearing
|
||||
* to a new value. */
|
||||
if (zstex->stencil_cleared && zstex->stencil_clear_value != stencil) {
|
||||
sctx->db_stencil_disable_expclear = true;
|
||||
}
|
||||
|
||||
zstex->stencil_clear_value = stencil;
|
||||
sctx->framebuffer.dirty_zsbuf = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
|
||||
sctx->db_stencil_clear = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
}
|
||||
|
||||
si_blitter_begin(ctx, SI_CLEAR);
|
||||
|
|
@ -407,6 +424,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
zstex->depth_cleared = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
|
||||
if (sctx->db_stencil_clear) {
|
||||
sctx->db_stencil_clear = false;
|
||||
sctx->db_stencil_disable_expclear = false;
|
||||
zstex->stencil_cleared = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_clear_render_target(struct pipe_context *ctx,
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ static void si_launch_grid(
|
|||
/* The extra num_work_size_bytes are for work group / work item size information */
|
||||
kernel_args_size = program->input_size + num_work_size_bytes + 8 /* For scratch va */;
|
||||
|
||||
kernel_args = sctx->b.ws->buffer_map(input_buffer->cs_buf,
|
||||
kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
|
||||
sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
|
||||
for (i = 0; i < 3; i++) {
|
||||
kernel_args[i] = grid_layout[i];
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
|
|||
|
||||
/* Fallback for unaligned clears. */
|
||||
if (offset % 4 != 0 || size % 4 != 0) {
|
||||
uint8_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
|
||||
uint8_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->buf,
|
||||
sctx->b.gfx.cs,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
map += offset;
|
||||
|
|
@ -273,22 +273,26 @@ void si_copy_buffer(struct si_context *sctx,
|
|||
dst_offset += r600_resource(dst)->gpu_address;
|
||||
src_offset += r600_resource(src)->gpu_address;
|
||||
|
||||
/* If the size is not aligned, we must add a dummy copy at the end
|
||||
* just to align the internal counter. Otherwise, the DMA engine
|
||||
* would slow down by an order of magnitude for following copies.
|
||||
*/
|
||||
if (size % CP_DMA_ALIGNMENT)
|
||||
realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
|
||||
/* The workarounds aren't needed on Fiji and beyond. */
|
||||
if (sctx->b.family <= CHIP_CARRIZO ||
|
||||
sctx->b.family == CHIP_STONEY) {
|
||||
/* If the size is not aligned, we must add a dummy copy at the end
|
||||
* just to align the internal counter. Otherwise, the DMA engine
|
||||
* would slow down by an order of magnitude for following copies.
|
||||
*/
|
||||
if (size % CP_DMA_ALIGNMENT)
|
||||
realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
|
||||
|
||||
/* If the copy begins unaligned, we must start copying from the next
|
||||
* aligned block and the skipped part should be copied after everything
|
||||
* else has been copied. Only the src alignment matters, not dst.
|
||||
*/
|
||||
if (src_offset % CP_DMA_ALIGNMENT) {
|
||||
skipped_size = CP_DMA_ALIGNMENT - (src_offset % CP_DMA_ALIGNMENT);
|
||||
/* The main part will be skipped if the size is too small. */
|
||||
skipped_size = MIN2(skipped_size, size);
|
||||
size -= skipped_size;
|
||||
/* If the copy begins unaligned, we must start copying from the next
|
||||
* aligned block and the skipped part should be copied after everything
|
||||
* else has been copied. Only the src alignment matters, not dst.
|
||||
*/
|
||||
if (src_offset % CP_DMA_ALIGNMENT) {
|
||||
skipped_size = CP_DMA_ALIGNMENT - (src_offset % CP_DMA_ALIGNMENT);
|
||||
/* The main part will be skipped if the size is too small. */
|
||||
skipped_size = MIN2(skipped_size, size);
|
||||
size -= skipped_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Flush the caches. */
|
||||
|
|
|
|||
|
|
@ -61,13 +61,16 @@ static void print_spaces(FILE *f, unsigned num)
|
|||
static void print_value(FILE *file, uint32_t value, int bits)
|
||||
{
|
||||
/* Guess if it's int or float */
|
||||
if (value <= (1 << 15))
|
||||
fprintf(file, "%u\n", value);
|
||||
else {
|
||||
if (value <= (1 << 15)) {
|
||||
if (value <= 9)
|
||||
fprintf(file, "%u\n", value);
|
||||
else
|
||||
fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
|
||||
} else {
|
||||
float f = uif(value);
|
||||
|
||||
if (fabs(f) < 100000 && f*10 == floor(f*10))
|
||||
fprintf(file, "%.1ff\n", f);
|
||||
fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
|
||||
else
|
||||
/* Don't print more leading zeros than there are bits. */
|
||||
fprintf(file, "0x%0*x\n", bits / 4, value);
|
||||
|
|
@ -407,7 +410,7 @@ static void si_dump_last_ib(struct si_context *sctx, FILE *f)
|
|||
* waited for the context, so this buffer should be idle.
|
||||
* If the GPU is hung, there is no point in waiting for it.
|
||||
*/
|
||||
uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
|
||||
uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
|
||||
NULL,
|
||||
PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||
PIPE_TRANSFER_READ);
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||
|
||||
sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
|
||||
sctx, sscreen->b.trace_bo ?
|
||||
sscreen->b.trace_bo->cs_buf : NULL);
|
||||
sscreen->b.trace_bo->buf : NULL);
|
||||
sctx->b.gfx.flush = si_context_gfx_flush;
|
||||
|
||||
/* Border colors. */
|
||||
|
|
@ -160,7 +160,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||
goto fail;
|
||||
|
||||
sctx->border_color_map =
|
||||
ws->buffer_map(sctx->border_color_buffer->cs_buf,
|
||||
ws->buffer_map(sctx->border_color_buffer->buf,
|
||||
NULL, PIPE_TRANSFER_WRITE);
|
||||
if (!sctx->border_color_map)
|
||||
goto fail;
|
||||
|
|
|
|||
|
|
@ -253,6 +253,8 @@ struct si_context {
|
|||
bool db_flush_stencil_inplace;
|
||||
bool db_depth_clear;
|
||||
bool db_depth_disable_expclear;
|
||||
bool db_stencil_clear;
|
||||
bool db_stencil_disable_expclear;
|
||||
unsigned ps_db_shader_control;
|
||||
|
||||
/* Emitted draw state. */
|
||||
|
|
|
|||
|
|
@ -3827,7 +3827,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
|
|||
if (!shader->bo)
|
||||
return -ENOMEM;
|
||||
|
||||
ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
|
||||
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
|
||||
PIPE_TRANSFER_READ_WRITE);
|
||||
util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
|
||||
if (binary->rodata_size > 0) {
|
||||
|
|
@ -3836,7 +3836,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
|
|||
binary->rodata_size);
|
||||
}
|
||||
|
||||
sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
|
||||
sscreen->b.ws->buffer_unmap(shader->bo->buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -213,7 +213,6 @@ struct si_shader_selector {
|
|||
/* masks of "get_unique_index" bits */
|
||||
uint64_t outputs_written;
|
||||
uint32_t patch_outputs_written;
|
||||
uint32_t ps_colors_written;
|
||||
};
|
||||
|
||||
/* Valid shader configurations:
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
|
|||
*/
|
||||
if (blend && blend->dual_src_blend &&
|
||||
sctx->ps_shader.cso &&
|
||||
(sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
|
||||
(sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
|
||||
mask = 0;
|
||||
|
||||
radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
|
||||
|
|
@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t si_translate_blend_opt_function(int blend_func)
|
||||
{
|
||||
switch (blend_func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
return V_028760_OPT_COMB_ADD;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
return V_028760_OPT_COMB_SUBTRACT;
|
||||
case PIPE_BLEND_REVERSE_SUBTRACT:
|
||||
return V_028760_OPT_COMB_REVSUBTRACT;
|
||||
case PIPE_BLEND_MIN:
|
||||
return V_028760_OPT_COMB_MIN;
|
||||
case PIPE_BLEND_MAX:
|
||||
return V_028760_OPT_COMB_MAX;
|
||||
default:
|
||||
return V_028760_OPT_COMB_BLEND_DISABLED;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
|
||||
{
|
||||
switch (blend_fact) {
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
|
||||
: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
|
||||
return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
|
||||
: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||
return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
|
||||
: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
|
||||
default:
|
||||
return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||
const struct pipe_blend_state *state,
|
||||
unsigned mode)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context*)ctx;
|
||||
struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
|
||||
struct si_pm4_state *pm4 = &blend->pm4;
|
||||
|
||||
|
|
@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
|||
} else {
|
||||
color_control |= S_028808_MODE(V_028808_CB_DISABLE);
|
||||
}
|
||||
si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY) {
|
||||
uint32_t sx_blend_opt_control = 0;
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
const int j = state->independent_blend_enable ? i : 0;
|
||||
|
||||
/* TODO: We can also set this if the surface doesn't contain RGB. */
|
||||
if (!state->rt[j].blend_enable ||
|
||||
!(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
|
||||
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
|
||||
|
||||
/* TODO: We can also set this if the surface doesn't contain alpha. */
|
||||
if (!state->rt[j].blend_enable ||
|
||||
!(state->rt[j].colormask & PIPE_MASK_A))
|
||||
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
|
||||
|
||||
if (!state->rt[j].blend_enable) {
|
||||
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
|
||||
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
|
||||
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
|
||||
continue;
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
|
||||
S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
|
||||
S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
|
||||
S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
|
||||
S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
|
||||
S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
|
||||
S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
|
||||
|
||||
/* RB+ doesn't work with dual source blending */
|
||||
if (blend->dual_src_blend)
|
||||
color_control |= S_028808_DISABLE_DUAL_QUAD(1);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
|
||||
return blend;
|
||||
}
|
||||
|
||||
|
|
@ -1007,10 +1090,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
|
|||
radeon_emit(cs,
|
||||
S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
|
||||
S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
|
||||
} else if (sctx->db_depth_clear) {
|
||||
radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
|
||||
} else {
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs,
|
||||
S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
|
||||
S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
|
||||
}
|
||||
|
||||
/* DB_COUNT_CONTROL (occlusion queries) */
|
||||
|
|
@ -1037,12 +1120,9 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
|
|||
}
|
||||
|
||||
/* DB_RENDER_OVERRIDE2 */
|
||||
if (sctx->db_depth_disable_expclear) {
|
||||
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
|
||||
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
|
||||
} else {
|
||||
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
|
||||
}
|
||||
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
|
||||
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
|
||||
S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear));
|
||||
|
||||
db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
|
||||
sctx->ps_db_shader_control;
|
||||
|
|
@ -1057,6 +1137,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
|
|||
if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
|
||||
db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY &&
|
||||
sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
|
||||
db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
|
||||
|
||||
radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
|
||||
db_shader_control);
|
||||
}
|
||||
|
|
@ -1970,6 +2054,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
|
|||
surf->export_16bpc = true;
|
||||
}
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY &&
|
||||
!(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
|
||||
switch (desc->channel[0].size) {
|
||||
case 32:
|
||||
if (desc->nr_channels == 1) {
|
||||
if (swap == V_0280A0_SWAP_STD)
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
|
||||
else if (swap == V_0280A0_SWAP_ALT_REV)
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
/* For 1-channel formats, use the superset thereof. */
|
||||
if (desc->nr_channels <= 2) {
|
||||
if (swap == V_0280A0_SWAP_STD ||
|
||||
swap == V_0280A0_SWAP_STD_REV)
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
|
||||
else
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
|
||||
}
|
||||
break;
|
||||
case 11:
|
||||
if (desc->nr_channels == 3) {
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
|
||||
surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
if (desc->nr_channels == 4) {
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
|
||||
surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
/* For 1 and 2-channel formats, use the superset thereof. */
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
|
||||
surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
|
||||
break;
|
||||
case 5:
|
||||
if (desc->nr_channels == 3) {
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
|
||||
surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
|
||||
} else if (desc->nr_channels == 4) {
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
|
||||
surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
/* For 1 nad 2-channel formats, use the superset thereof. */
|
||||
surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
|
||||
surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
surf->color_initialized = true;
|
||||
}
|
||||
|
||||
|
|
@ -2075,9 +2214,11 @@ static void si_init_depth_surface(struct si_context *sctx,
|
|||
z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
|
||||
S_028040_ALLOW_EXPCLEAR(1);
|
||||
|
||||
/* Use all of the htile_buffer for depth, because we don't
|
||||
* use HTILE for stencil because of FAST_STENCIL_DISABLE. */
|
||||
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
s_info |= S_028044_ALLOW_EXPCLEAR(1);
|
||||
else
|
||||
/* Use all of the htile_buffer for depth if there's no stencil. */
|
||||
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
||||
|
||||
uint64_t va = rtex->htile_buffer->gpu_address;
|
||||
db_htile_data_base = va >> 8;
|
||||
|
|
@ -2238,6 +2379,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
|
|||
unsigned i, nr_cbufs = state->nr_cbufs;
|
||||
struct r600_texture *tex = NULL;
|
||||
struct r600_surface *cb = NULL;
|
||||
uint32_t sx_ps_downconvert = 0;
|
||||
uint32_t sx_blend_opt_epsilon = 0;
|
||||
|
||||
/* Colorbuffers. */
|
||||
for (i = 0; i < nr_cbufs; i++) {
|
||||
|
|
@ -2288,18 +2431,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
|
|||
|
||||
if (sctx->b.chip_class >= VI)
|
||||
radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
|
||||
|
||||
sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
|
||||
sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
|
||||
}
|
||||
/* set CB_COLOR1_INFO for possible dual-src blending */
|
||||
if (i == 1 && state->cbufs[0] &&
|
||||
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
|
||||
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
|
||||
cb->cb_color_info | tex->cb_color_info);
|
||||
sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
|
||||
sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
|
||||
i++;
|
||||
}
|
||||
for (; i < 8 ; i++)
|
||||
if (sctx->framebuffer.dirty_cbufs & (1 << i))
|
||||
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY) {
|
||||
radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
|
||||
radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
|
||||
radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
|
||||
}
|
||||
|
||||
/* ZS buffer. */
|
||||
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
|
||||
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
|
||||
|
|
@ -2332,8 +2486,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
|
|||
radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
|
||||
radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
|
||||
radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
|
||||
radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
|
||||
|
||||
radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
|
||||
radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
|
||||
radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
|
||||
zb->pa_su_poly_offset_db_fmt_cntl);
|
||||
} else if (sctx->framebuffer.dirty_zsbuf) {
|
||||
|
|
@ -3424,18 +3581,12 @@ static void si_init_config(struct si_context *sctx)
|
|||
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
|
||||
si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
|
||||
si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
|
||||
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
|
||||
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
|
||||
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
|
||||
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
|
||||
|
||||
/* There is a hang if stencil is used and fast stencil is enabled
|
||||
* regardless of whether HTILE is depth-only or not.
|
||||
*/
|
||||
si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
|
||||
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
|
||||
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
|
||||
S_02800C_FAST_STENCIL_DISABLE(1));
|
||||
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
|
||||
|
||||
si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
|
||||
si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
|
||||
|
|
@ -3460,7 +3611,7 @@ static void si_init_config(struct si_context *sctx)
|
|||
}
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY)
|
||||
si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
|
||||
si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
|
||||
if (sctx->b.chip_class >= CIK)
|
||||
|
|
|
|||
|
|
@ -216,6 +216,18 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
|||
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
|
||||
}
|
||||
|
||||
static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
|
||||
{
|
||||
switch (info->mode) {
|
||||
case PIPE_PRIM_PATCHES:
|
||||
return info->count / info->vertices_per_patch;
|
||||
case R600_PRIM_RECTANGLE_LIST:
|
||||
return info->count / 3;
|
||||
default:
|
||||
return u_prims_for_vertices(info->mode, info->count);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned num_patches)
|
||||
|
|
@ -320,7 +332,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
|||
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
|
||||
(info->indirect ||
|
||||
(info->instance_count > 1 &&
|
||||
u_prims_for_vertices(info->mode, info->count) <= 1)))
|
||||
si_num_prims_for_vertices(info) <= 1)))
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
|
||||
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
|
||||
|
|
@ -872,7 +884,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
|
||||
/* Workaround for a VGT hang when streamout is enabled.
|
||||
* It must be done after drawing. */
|
||||
if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
|
||||
if ((sctx->b.family == CHIP_HAWAII ||
|
||||
sctx->b.family == CHIP_TONGA ||
|
||||
sctx->b.family == CHIP_FIJI) &&
|
||||
(sctx->b.streamout.streamout_enabled ||
|
||||
sctx->b.streamout.prims_gen_query_enabled)) {
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
|
||||
|
|
|
|||
|
|
@ -730,15 +730,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
}
|
||||
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
for (i = 0; i < sel->info.num_outputs; i++) {
|
||||
unsigned name = sel->info.output_semantic_name[i];
|
||||
unsigned index = sel->info.output_semantic_index[i];
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR)
|
||||
sel->ps_colors_written |= 1 << index;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
|
||||
|
|
|
|||
|
|
@ -103,11 +103,9 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
|
|||
if (!resources[i])
|
||||
continue;
|
||||
|
||||
/* recreate the CS handle */
|
||||
resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle(
|
||||
resources[i]->resource.buf);
|
||||
/* reset the address */
|
||||
resources[i]->resource.gpu_address = ctx->b.ws->buffer_get_virtual_address(
|
||||
resources[i]->resource.cs_buf);
|
||||
resources[i]->resource.buf);
|
||||
}
|
||||
|
||||
template.height *= array_size;
|
||||
|
|
@ -121,7 +119,7 @@ error:
|
|||
}
|
||||
|
||||
/* set the decoding target buffer offsets */
|
||||
static struct radeon_winsys_cs_handle* si_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
|
||||
static struct pb_buffer* si_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
|
||||
{
|
||||
struct r600_texture *luma = (struct r600_texture *)buf->resources[0];
|
||||
struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
|
||||
|
|
@ -130,18 +128,18 @@ static struct radeon_winsys_cs_handle* si_uvd_set_dtb(struct ruvd_msg *msg, stru
|
|||
|
||||
ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
|
||||
|
||||
return luma->resource.cs_buf;
|
||||
return luma->resource.buf;
|
||||
}
|
||||
|
||||
/* get the radeon resources for VCE */
|
||||
static void si_vce_get_buffer(struct pipe_resource *resource,
|
||||
struct radeon_winsys_cs_handle **handle,
|
||||
struct pb_buffer **handle,
|
||||
struct radeon_surf **surface)
|
||||
{
|
||||
struct r600_texture *res = (struct r600_texture *)resource;
|
||||
|
||||
if (handle)
|
||||
*handle = res->resource.cs_buf;
|
||||
*handle = res->resource.buf;
|
||||
|
||||
if (surface)
|
||||
*surface = &res->surface;
|
||||
|
|
|
|||
|
|
@ -6771,6 +6771,9 @@
|
|||
#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
|
||||
#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
|
||||
#define R_028808_CB_COLOR_CONTROL 0x028808
|
||||
#define S_028808_DISABLE_DUAL_QUAD(x) (((x) & 0x1) << 0)
|
||||
#define G_028808_DISABLE_DUAL_QUAD(x) (((x) >> 0) & 0x1)
|
||||
#define C_028808_DISABLE_DUAL_QUAD 0xFFFFFFFE
|
||||
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
|
||||
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
|
||||
#define C_028808_DEGAMMA_ENABLE 0xFFFFFFF7
|
||||
|
|
|
|||
|
|
@ -79,10 +79,10 @@ struct softpipe_context {
|
|||
struct pipe_resource *constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
|
||||
struct pipe_framebuffer_state framebuffer;
|
||||
struct pipe_poly_stipple poly_stipple;
|
||||
struct pipe_scissor_state scissor;
|
||||
struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
|
||||
struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
|
||||
struct pipe_viewport_state viewport;
|
||||
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
|
||||
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
|
||||
struct pipe_index_buffer index_buffer;
|
||||
struct pipe_resource *mapped_vs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
|
|
@ -123,6 +123,9 @@ struct softpipe_context {
|
|||
/** Which vertex shader output slot contains point size */
|
||||
int psize_slot;
|
||||
|
||||
/** Which vertex shader output slot contains viewport index */
|
||||
int viewport_index_slot;
|
||||
|
||||
/** Which vertex shader output slot contains layer */
|
||||
int layer_slot;
|
||||
|
||||
|
|
@ -140,7 +143,7 @@ struct softpipe_context {
|
|||
unsigned reduced_prim;
|
||||
|
||||
/** Derived from scissor and surface bounds: */
|
||||
struct pipe_scissor_state cliprect;
|
||||
struct pipe_scissor_state cliprect[PIPE_MAX_VIEWPORTS];
|
||||
|
||||
unsigned line_stipple_counter;
|
||||
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ struct quad_header_input
|
|||
{
|
||||
int x0, y0; /**< quad window pos, always even */
|
||||
unsigned layer;
|
||||
unsigned viewport_index;
|
||||
float coverage[TGSI_QUAD_SIZE]; /**< fragment coverage for antialiasing */
|
||||
unsigned facing:1; /**< Front (0) or back (1) facing? */
|
||||
unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */
|
||||
|
|
|
|||
|
|
@ -785,6 +785,7 @@ depth_test_quads_fallback(struct quad_stage *qs,
|
|||
boolean interp_depth = !fsInfo->writes_z;
|
||||
boolean shader_stencil_ref = fsInfo->writes_stencil;
|
||||
struct depth_data data;
|
||||
unsigned vp_idx = quads[0]->input.viewport_index;
|
||||
|
||||
data.use_shader_stencil_refs = FALSE;
|
||||
|
||||
|
|
@ -804,8 +805,8 @@ depth_test_quads_fallback(struct quad_stage *qs,
|
|||
quads[0]->input.y0, quads[0]->input.layer);
|
||||
data.clamp = !qs->softpipe->rasterizer->depth_clip;
|
||||
|
||||
near_val = qs->softpipe->viewport.translate[2] - qs->softpipe->viewport.scale[2];
|
||||
far_val = near_val + (qs->softpipe->viewport.scale[2] * 2.0);
|
||||
near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
|
||||
far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
|
||||
data.minval = MIN2(near_val, far_val);
|
||||
data.maxval = MAX2(near_val, far_val);
|
||||
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
return 0;
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
return 1;
|
||||
return PIPE_MAX_VIEWPORTS;
|
||||
case PIPE_CAP_ENDIANNESS:
|
||||
return PIPE_ENDIAN_NATIVE;
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
|
||||
|
|
|
|||
|
|
@ -128,7 +128,8 @@ struct setup_context {
|
|||
static inline void
|
||||
quad_clip(struct setup_context *setup, struct quad_header *quad)
|
||||
{
|
||||
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
|
||||
unsigned viewport_index = quad[0].input.viewport_index;
|
||||
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect[viewport_index];
|
||||
const int minx = (int) cliprect->minx;
|
||||
const int maxx = (int) cliprect->maxx;
|
||||
const int miny = (int) cliprect->miny;
|
||||
|
|
@ -159,7 +160,7 @@ quad_clip(struct setup_context *setup, struct quad_header *quad)
|
|||
static inline void
|
||||
clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
|
||||
{
|
||||
quad_clip( setup, quad );
|
||||
quad_clip(setup, quad);
|
||||
|
||||
if (quad->inout.mask) {
|
||||
struct softpipe_context *sp = setup->softpipe;
|
||||
|
|
@ -707,9 +708,10 @@ static void
|
|||
subtriangle(struct setup_context *setup,
|
||||
struct edge *eleft,
|
||||
struct edge *eright,
|
||||
int lines)
|
||||
int lines,
|
||||
unsigned viewport_index)
|
||||
{
|
||||
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
|
||||
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect[viewport_index];
|
||||
const int minx = (int) cliprect->minx;
|
||||
const int maxx = (int) cliprect->maxx;
|
||||
const int miny = (int) cliprect->miny;
|
||||
|
|
@ -807,6 +809,7 @@ sp_setup_tri(struct setup_context *setup,
|
|||
{
|
||||
float det;
|
||||
uint layer = 0;
|
||||
unsigned viewport_index = 0;
|
||||
#if DEBUG_VERTS
|
||||
debug_printf("Setup triangle:\n");
|
||||
print_vertex(setup, v0);
|
||||
|
|
@ -845,19 +848,25 @@ sp_setup_tri(struct setup_context *setup,
|
|||
}
|
||||
setup->quad[0].input.layer = layer;
|
||||
|
||||
if (setup->softpipe->viewport_index_slot > 0) {
|
||||
unsigned *udata = (unsigned*)v0[setup->softpipe->viewport_index_slot];
|
||||
viewport_index = sp_clamp_viewport_idx(*udata);
|
||||
}
|
||||
setup->quad[0].input.viewport_index = viewport_index;
|
||||
|
||||
/* init_constant_attribs( setup ); */
|
||||
|
||||
if (setup->oneoverarea < 0.0) {
|
||||
/* emaj on left:
|
||||
*/
|
||||
subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
|
||||
subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
|
||||
subtriangle(setup, &setup->emaj, &setup->ebot, setup->ebot.lines, viewport_index);
|
||||
subtriangle(setup, &setup->emaj, &setup->etop, setup->etop.lines, viewport_index);
|
||||
}
|
||||
else {
|
||||
/* emaj on right:
|
||||
*/
|
||||
subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
|
||||
subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
|
||||
subtriangle(setup, &setup->ebot, &setup->emaj, setup->ebot.lines, viewport_index);
|
||||
subtriangle(setup, &setup->etop, &setup->emaj, setup->etop.lines, viewport_index);
|
||||
}
|
||||
|
||||
flush_spans( setup );
|
||||
|
|
@ -1054,7 +1063,7 @@ plot(struct setup_context *setup, int x, int y)
|
|||
/* flush prev quad, start new quad */
|
||||
|
||||
if (setup->quad[0].input.x0 != -1)
|
||||
clip_emit_quad( setup, &setup->quad[0] );
|
||||
clip_emit_quad(setup, &setup->quad[0]);
|
||||
|
||||
setup->quad[0].input.x0 = quadX;
|
||||
setup->quad[0].input.y0 = quadY;
|
||||
|
|
@ -1083,6 +1092,7 @@ sp_setup_line(struct setup_context *setup,
|
|||
int dy = y1 - y0;
|
||||
int xstep, ystep;
|
||||
uint layer = 0;
|
||||
unsigned viewport_index = 0;
|
||||
|
||||
#if DEBUG_VERTS
|
||||
debug_printf("Setup line:\n");
|
||||
|
|
@ -1132,6 +1142,12 @@ sp_setup_line(struct setup_context *setup,
|
|||
}
|
||||
setup->quad[0].input.layer = layer;
|
||||
|
||||
if (setup->softpipe->viewport_index_slot > 0) {
|
||||
unsigned *udata = (unsigned*)setup->vprovoke[setup->softpipe->viewport_index_slot];
|
||||
viewport_index = sp_clamp_viewport_idx(*udata);
|
||||
}
|
||||
setup->quad[0].input.viewport_index = viewport_index;
|
||||
|
||||
/* XXX temporary: set coverage to 1.0 so the line appears
|
||||
* if AA mode happens to be enabled.
|
||||
*/
|
||||
|
|
@ -1183,7 +1199,7 @@ sp_setup_line(struct setup_context *setup,
|
|||
|
||||
/* draw final quad */
|
||||
if (setup->quad[0].inout.mask) {
|
||||
clip_emit_quad( setup, &setup->quad[0] );
|
||||
clip_emit_quad(setup, &setup->quad[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1223,6 +1239,7 @@ sp_setup_point(struct setup_context *setup,
|
|||
const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
|
||||
uint fragSlot;
|
||||
uint layer = 0;
|
||||
unsigned viewport_index = 0;
|
||||
#if DEBUG_VERTS
|
||||
debug_printf("Setup point:\n");
|
||||
print_vertex(setup, v0);
|
||||
|
|
@ -1239,6 +1256,12 @@ sp_setup_point(struct setup_context *setup,
|
|||
}
|
||||
setup->quad[0].input.layer = layer;
|
||||
|
||||
if (setup->softpipe->viewport_index_slot > 0) {
|
||||
unsigned *udata = (unsigned*)v0[setup->softpipe->viewport_index_slot];
|
||||
viewport_index = sp_clamp_viewport_idx(*udata);
|
||||
}
|
||||
setup->quad[0].input.viewport_index = viewport_index;
|
||||
|
||||
/* For points, all interpolants are constant-valued.
|
||||
* However, for point sprites, we'll need to setup texcoords appropriately.
|
||||
* XXX: which coefficients are the texcoords???
|
||||
|
|
@ -1300,7 +1323,7 @@ sp_setup_point(struct setup_context *setup,
|
|||
setup->quad[0].input.x0 = (int) x - ix;
|
||||
setup->quad[0].input.y0 = (int) y - iy;
|
||||
setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
|
||||
clip_emit_quad( setup, &setup->quad[0] );
|
||||
clip_emit_quad(setup, &setup->quad[0]);
|
||||
}
|
||||
else {
|
||||
if (round) {
|
||||
|
|
@ -1361,7 +1384,7 @@ sp_setup_point(struct setup_context *setup,
|
|||
if (setup->quad[0].inout.mask) {
|
||||
setup->quad[0].input.x0 = ix;
|
||||
setup->quad[0].input.y0 = iy;
|
||||
clip_emit_quad( setup, &setup->quad[0] );
|
||||
clip_emit_quad(setup, &setup->quad[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1408,7 +1431,7 @@ sp_setup_point(struct setup_context *setup,
|
|||
setup->quad[0].inout.mask = mask;
|
||||
setup->quad[0].input.x0 = ix;
|
||||
setup->quad[0].input.y0 = iy;
|
||||
clip_emit_quad( setup, &setup->quad[0] );
|
||||
clip_emit_quad(setup, &setup->quad[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,11 @@ void
|
|||
sp_setup_point( struct setup_context *setup,
|
||||
const float (*v0)[4] );
|
||||
|
||||
static inline unsigned
|
||||
sp_clamp_viewport_idx(int idx)
|
||||
{
|
||||
return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0;
|
||||
}
|
||||
|
||||
struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe );
|
||||
void sp_setup_prepare( struct setup_context *setup );
|
||||
|
|
|
|||
|
|
@ -47,15 +47,16 @@ static void
|
|||
softpipe_set_viewport_states(struct pipe_context *pipe,
|
||||
unsigned start_slot,
|
||||
unsigned num_viewports,
|
||||
const struct pipe_viewport_state *viewport)
|
||||
const struct pipe_viewport_state *viewports)
|
||||
{
|
||||
struct softpipe_context *softpipe = softpipe_context(pipe);
|
||||
|
||||
/* pass the viewport info to the draw module */
|
||||
draw_set_viewport_states(softpipe->draw, start_slot, num_viewports,
|
||||
viewport);
|
||||
viewports);
|
||||
|
||||
softpipe->viewport = *viewport; /* struct copy */
|
||||
memcpy(softpipe->viewports + start_slot, viewports,
|
||||
sizeof(struct pipe_viewport_state) * num_viewports);
|
||||
softpipe->dirty |= SP_NEW_VIEWPORT;
|
||||
}
|
||||
|
||||
|
|
@ -64,13 +65,17 @@ static void
|
|||
softpipe_set_scissor_states(struct pipe_context *pipe,
|
||||
unsigned start_slot,
|
||||
unsigned num_scissors,
|
||||
const struct pipe_scissor_state *scissor)
|
||||
const struct pipe_scissor_state *scissors)
|
||||
{
|
||||
struct softpipe_context *softpipe = softpipe_context(pipe);
|
||||
|
||||
draw_flush(softpipe->draw);
|
||||
|
||||
softpipe->scissor = *scissor; /* struct copy */
|
||||
debug_assert(start_slot < PIPE_MAX_VIEWPORTS);
|
||||
debug_assert((start_slot + num_scissors) <= PIPE_MAX_VIEWPORTS);
|
||||
|
||||
memcpy(softpipe->scissors + start_slot, scissors,
|
||||
sizeof(struct pipe_scissor_state) * num_scissors);
|
||||
softpipe->dirty |= SP_NEW_SCISSOR;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ struct vertex_info *
|
|||
softpipe_get_vertex_info(struct softpipe_context *softpipe)
|
||||
{
|
||||
struct vertex_info *vinfo = &softpipe->vertex_info;
|
||||
int vs_index;
|
||||
|
||||
if (vinfo->num_attribs == 0) {
|
||||
/* compute vertex layout now */
|
||||
|
|
@ -135,17 +136,35 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
|
|||
draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
|
||||
}
|
||||
|
||||
softpipe->psize_slot = draw_find_shader_output(softpipe->draw,
|
||||
TGSI_SEMANTIC_PSIZE, 0);
|
||||
if (softpipe->psize_slot >= 0) {
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
|
||||
softpipe->psize_slot);
|
||||
/* Figure out if we need pointsize as well. */
|
||||
vs_index = draw_find_shader_output(softpipe->draw,
|
||||
TGSI_SEMANTIC_PSIZE, 0);
|
||||
|
||||
if (vs_index >= 0) {
|
||||
softpipe->psize_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
}
|
||||
|
||||
softpipe->layer_slot = draw_find_shader_output(softpipe->draw,
|
||||
TGSI_SEMANTIC_LAYER, 0);
|
||||
if (softpipe->layer_slot >= 0) {
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->layer_slot);
|
||||
/* Figure out if we need viewport index */
|
||||
vs_index = draw_find_shader_output(softpipe->draw,
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
softpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else {
|
||||
softpipe->viewport_index_slot = 0;
|
||||
}
|
||||
|
||||
/* Figure out if we need layer */
|
||||
vs_index = draw_find_shader_output(softpipe->draw,
|
||||
TGSI_SEMANTIC_LAYER,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
softpipe->layer_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else {
|
||||
softpipe->layer_slot = 0;
|
||||
}
|
||||
|
||||
draw_compute_vertex_size(vinfo);
|
||||
|
|
@ -183,30 +202,33 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe)
|
|||
static void
|
||||
compute_cliprect(struct softpipe_context *sp)
|
||||
{
|
||||
unsigned i;
|
||||
/* SP_NEW_FRAMEBUFFER
|
||||
*/
|
||||
uint surfWidth = sp->framebuffer.width;
|
||||
uint surfHeight = sp->framebuffer.height;
|
||||
|
||||
/* SP_NEW_RASTERIZER
|
||||
*/
|
||||
if (sp->rasterizer->scissor) {
|
||||
|
||||
/* SP_NEW_SCISSOR
|
||||
*
|
||||
* clip to scissor rect:
|
||||
for (i = 0; i < PIPE_MAX_VIEWPORTS; i++) {
|
||||
/* SP_NEW_RASTERIZER
|
||||
*/
|
||||
sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
|
||||
sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
|
||||
sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
|
||||
sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
|
||||
}
|
||||
else {
|
||||
/* clip to surface bounds */
|
||||
sp->cliprect.minx = 0;
|
||||
sp->cliprect.miny = 0;
|
||||
sp->cliprect.maxx = surfWidth;
|
||||
sp->cliprect.maxy = surfHeight;
|
||||
if (sp->rasterizer->scissor) {
|
||||
|
||||
/* SP_NEW_SCISSOR
|
||||
*
|
||||
* clip to scissor rect:
|
||||
*/
|
||||
sp->cliprect[i].minx = MAX2(sp->scissors[i].minx, 0);
|
||||
sp->cliprect[i].miny = MAX2(sp->scissors[i].miny, 0);
|
||||
sp->cliprect[i].maxx = MIN2(sp->scissors[i].maxx, surfWidth);
|
||||
sp->cliprect[i].maxy = MIN2(sp->scissors[i].maxy, surfHeight);
|
||||
}
|
||||
else {
|
||||
/* clip to surface bounds */
|
||||
sp->cliprect[i].minx = 0;
|
||||
sp->cliprect[i].miny = 0;
|
||||
sp->cliprect[i].maxx = surfWidth;
|
||||
sp->cliprect[i].maxy = surfHeight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,8 +67,8 @@ static void sp_blit(struct pipe_context *pipe,
|
|||
util_blitter_save_so_targets(sp->blitter, sp->num_so_targets,
|
||||
(struct pipe_stream_output_target**)sp->so_targets);
|
||||
util_blitter_save_rasterizer(sp->blitter, sp->rasterizer);
|
||||
util_blitter_save_viewport(sp->blitter, &sp->viewport);
|
||||
util_blitter_save_scissor(sp->blitter, &sp->scissor);
|
||||
util_blitter_save_viewport(sp->blitter, &sp->viewports[0]);
|
||||
util_blitter_save_scissor(sp->blitter, &sp->scissors[0]);
|
||||
util_blitter_save_fragment_shader(sp->blitter, sp->fs);
|
||||
util_blitter_save_blend(sp->blitter, sp->blend);
|
||||
util_blitter_save_depth_stencil_alpha(sp->blitter, sp->depth_stencil);
|
||||
|
|
|
|||
|
|
@ -589,10 +589,10 @@ vc4_nir_next_output_driver_location(nir_shader *s)
|
|||
{
|
||||
int maxloc = -1;
|
||||
|
||||
nir_foreach_variable(var, &s->inputs)
|
||||
maxloc = MAX2(maxloc, var->data.driver_location);
|
||||
nir_foreach_variable(var, &s->outputs)
|
||||
maxloc = MAX2(maxloc, (int)var->data.driver_location);
|
||||
|
||||
return maxloc;
|
||||
return maxloc + 1;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -605,12 +605,11 @@ vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
|
|||
sample_mask->data.driver_location =
|
||||
vc4_nir_next_output_driver_location(c->s);
|
||||
sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
|
||||
exec_list_push_tail(&c->s->outputs, &sample_mask->node);
|
||||
|
||||
nir_intrinsic_instr *intr =
|
||||
nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
|
||||
intr->num_components = 1;
|
||||
intr->const_index[0] = sample_mask->data.location;
|
||||
intr->const_index[0] = sample_mask->data.driver_location;
|
||||
|
||||
intr->src[0] = nir_src_for_ssa(val);
|
||||
intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
|
|
|
|||
|
|
@ -326,7 +326,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
|
|||
/* Color output is lowered by vc4_nir_lower_blend(). */
|
||||
if (c->stage == QSTAGE_FRAG &&
|
||||
(output_var->data.location == FRAG_RESULT_COLOR ||
|
||||
output_var->data.location == FRAG_RESULT_DATA0)) {
|
||||
output_var->data.location == FRAG_RESULT_DATA0 ||
|
||||
output_var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
|
||||
intr->const_index[0] *= 4;
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,7 +94,12 @@ static void
|
|||
replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
|
||||
{
|
||||
dump_from(c, inst);
|
||||
inst->op = QOP_MOV;
|
||||
if (qir_is_mul(inst))
|
||||
inst->op = QOP_MMOV;
|
||||
else if (qir_is_float_input(inst))
|
||||
inst->op = QOP_FMOV;
|
||||
else
|
||||
inst->op = QOP_MOV;
|
||||
inst->src[0] = arg;
|
||||
inst->src[1] = c->undef;
|
||||
dump_to(c, inst);
|
||||
|
|
@ -177,10 +182,29 @@ qir_opt_algebraic(struct vc4_compile *c)
|
|||
|
||||
break;
|
||||
|
||||
case QOP_FMIN:
|
||||
if (is_1f(c, inst->src[1]) &&
|
||||
inst->src[0].pack >= QPU_UNPACK_8D_REP &&
|
||||
inst->src[0].pack <= QPU_UNPACK_8D) {
|
||||
replace_with_mov(c, inst, inst->src[0]);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case QOP_FMAX:
|
||||
if (is_zero(c, inst->src[1]) &&
|
||||
inst->src[0].pack >= QPU_UNPACK_8D_REP &&
|
||||
inst->src[0].pack <= QPU_UNPACK_8D) {
|
||||
replace_with_mov(c, inst, inst->src[0]);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case QOP_FSUB:
|
||||
case QOP_SUB:
|
||||
if (is_zero(c, inst->src[1])) {
|
||||
replace_with_mov(c, inst, inst->src[0]);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -423,13 +423,19 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
|
|||
struct qreg
|
||||
qir_follow_movs(struct vc4_compile *c, struct qreg reg)
|
||||
{
|
||||
int pack = reg.pack;
|
||||
|
||||
while (reg.file == QFILE_TEMP &&
|
||||
c->defs[reg.index] &&
|
||||
c->defs[reg.index]->op == QOP_MOV &&
|
||||
!c->defs[reg.index]->dst.pack) {
|
||||
(c->defs[reg.index]->op == QOP_MOV ||
|
||||
c->defs[reg.index]->op == QOP_FMOV ||
|
||||
c->defs[reg.index]->op == QOP_MMOV)&&
|
||||
!c->defs[reg.index]->dst.pack &&
|
||||
!c->defs[reg.index]->src[0].pack) {
|
||||
reg = c->defs[reg.index]->src[0];
|
||||
}
|
||||
|
||||
reg.pack = pack;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -502,7 +502,7 @@ nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
|
|||
void vc4_nir_lower_txf_ms(struct vc4_compile *c);
|
||||
void qir_lower_uniforms(struct vc4_compile *c);
|
||||
|
||||
void qpu_schedule_instructions(struct vc4_compile *c);
|
||||
uint32_t qpu_schedule_instructions(struct vc4_compile *c);
|
||||
|
||||
void qir_SF(struct vc4_compile *c, struct qreg src);
|
||||
|
||||
|
|
|
|||
|
|
@ -513,7 +513,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
|
|||
}
|
||||
}
|
||||
|
||||
qpu_schedule_instructions(c);
|
||||
uint32_t cycles = qpu_schedule_instructions(c);
|
||||
uint32_t inst_count_at_schedule_time = c->qpu_inst_count;
|
||||
|
||||
/* thread end can't have VPM write or read */
|
||||
if (QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
|
||||
|
|
@ -556,6 +557,15 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
|
|||
break;
|
||||
}
|
||||
|
||||
cycles += c->qpu_inst_count - inst_count_at_schedule_time;
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_SHADERDB) {
|
||||
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
|
||||
qir_get_stage_name(c->stage),
|
||||
c->program_id, c->variant_id,
|
||||
cycles);
|
||||
}
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_QPU)
|
||||
vc4_dump_program(c);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ struct schedule_node {
|
|||
uint32_t child_array_size;
|
||||
uint32_t parent_count;
|
||||
|
||||
/* Longest cycles + n->latency of any parent of this node. */
|
||||
uint32_t unblocked_time;
|
||||
|
||||
/**
|
||||
* Minimum number of cycles from scheduling this instruction until the
|
||||
* end of the program, based on the slowest dependency chain through
|
||||
|
|
@ -90,6 +93,8 @@ struct schedule_state {
|
|||
struct schedule_node *last_tlb;
|
||||
struct schedule_node *last_vpm;
|
||||
enum direction dir;
|
||||
/* Estimated cycle when the current instruction would start. */
|
||||
uint32_t time;
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -599,10 +604,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
|||
static void
|
||||
dump_state(struct list_head *schedule_list)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
|
||||
list_for_each_entry(struct schedule_node, n, schedule_list, link) {
|
||||
fprintf(stderr, "%3d: ", i++);
|
||||
fprintf(stderr, " t=%4d: ", n->unblocked_time);
|
||||
vc4_qpu_disasm(&n->inst->inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
|
@ -611,7 +614,7 @@ dump_state(struct list_head *schedule_list)
|
|||
if (!child)
|
||||
continue;
|
||||
|
||||
fprintf(stderr, " - ");
|
||||
fprintf(stderr, " - ");
|
||||
vc4_qpu_disasm(&child->inst->inst, 1);
|
||||
fprintf(stderr, " (%d parents, %c)\n",
|
||||
child->parent_count,
|
||||
|
|
@ -638,6 +641,7 @@ compute_delay(struct schedule_node *n)
|
|||
|
||||
static void
|
||||
mark_instruction_scheduled(struct list_head *schedule_list,
|
||||
uint32_t time,
|
||||
struct schedule_node *node,
|
||||
bool war_only)
|
||||
{
|
||||
|
|
@ -654,6 +658,14 @@ mark_instruction_scheduled(struct list_head *schedule_list,
|
|||
if (war_only && !node->children[i].write_after_read)
|
||||
continue;
|
||||
|
||||
/* If the requirement is only that the node not appear before
|
||||
* the last read of its destination, then it can be scheduled
|
||||
* immediately after (or paired with!) the thing reading the
|
||||
* destination.
|
||||
*/
|
||||
int latency_from_previous = war_only ? 0 : node->latency;
|
||||
child->unblocked_time = MAX2(child->unblocked_time,
|
||||
time + latency_from_previous);
|
||||
child->parent_count--;
|
||||
if (child->parent_count == 0)
|
||||
list_add(&child->link, schedule_list);
|
||||
|
|
@ -662,10 +674,11 @@ mark_instruction_scheduled(struct list_head *schedule_list,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
static uint32_t
|
||||
schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
||||
{
|
||||
struct choose_scoreboard scoreboard;
|
||||
uint32_t time = 0;
|
||||
|
||||
/* We reorder the uniforms as we schedule instructions, so save the
|
||||
* old data off and replace it.
|
||||
|
|
@ -708,9 +721,10 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
uint64_t inst = chosen ? chosen->inst->inst : qpu_NOP();
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "current list:\n");
|
||||
fprintf(stderr, "t=%4d: current list:\n",
|
||||
time);
|
||||
dump_state(schedule_list);
|
||||
fprintf(stderr, "chose: ");
|
||||
fprintf(stderr, "t=%4d: chose: ", time);
|
||||
vc4_qpu_disasm(&inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
|
@ -719,8 +733,10 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
* find an instruction to pair with it.
|
||||
*/
|
||||
if (chosen) {
|
||||
time = MAX2(chosen->unblocked_time, time);
|
||||
list_del(&chosen->link);
|
||||
mark_instruction_scheduled(schedule_list, chosen, true);
|
||||
mark_instruction_scheduled(schedule_list, time,
|
||||
chosen, true);
|
||||
if (chosen->uniform != -1) {
|
||||
c->uniform_data[next_uniform] =
|
||||
uniform_data[chosen->uniform];
|
||||
|
|
@ -733,6 +749,7 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
schedule_list,
|
||||
chosen);
|
||||
if (merge) {
|
||||
time = MAX2(merge->unblocked_time, time);
|
||||
list_del(&merge->link);
|
||||
inst = qpu_merge_inst(inst, merge->inst->inst);
|
||||
assert(inst != 0);
|
||||
|
|
@ -745,10 +762,11 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
}
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "merging: ");
|
||||
fprintf(stderr, "t=%4d: merging: ",
|
||||
time);
|
||||
vc4_qpu_disasm(&merge->inst->inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "resulting in: ");
|
||||
fprintf(stderr, " resulting in: ");
|
||||
vc4_qpu_disasm(&inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
|
@ -768,13 +786,16 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
* be scheduled. Update the children's unblocked time for this
|
||||
* DAG edge as we do so.
|
||||
*/
|
||||
mark_instruction_scheduled(schedule_list, chosen, false);
|
||||
mark_instruction_scheduled(schedule_list, merge, false);
|
||||
mark_instruction_scheduled(schedule_list, time, chosen, false);
|
||||
mark_instruction_scheduled(schedule_list, time, merge, false);
|
||||
|
||||
scoreboard.tick++;
|
||||
time++;
|
||||
}
|
||||
|
||||
assert(next_uniform == c->num_uniforms);
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
static uint32_t waddr_latency(uint32_t waddr)
|
||||
|
|
@ -784,7 +805,7 @@ static uint32_t waddr_latency(uint32_t waddr)
|
|||
|
||||
/* Some huge number, really. */
|
||||
if (waddr >= QPU_W_TMU0_S && waddr <= QPU_W_TMU1_B)
|
||||
return 10;
|
||||
return 100;
|
||||
|
||||
switch(waddr) {
|
||||
case QPU_W_SFU_RECIP:
|
||||
|
|
@ -804,7 +825,7 @@ instruction_latency(uint64_t inst)
|
|||
waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
|
||||
}
|
||||
|
||||
void
|
||||
uint32_t
|
||||
qpu_schedule_instructions(struct vc4_compile *c)
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
|
@ -849,7 +870,7 @@ qpu_schedule_instructions(struct vc4_compile *c)
|
|||
compute_delay(n);
|
||||
}
|
||||
|
||||
schedule_instructions(c, &schedule_list);
|
||||
uint32_t cycles = schedule_instructions(c, &schedule_list);
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Post-schedule instructions\n");
|
||||
|
|
@ -858,4 +879,6 @@ qpu_schedule_instructions(struct vc4_compile *c)
|
|||
}
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,47 +37,16 @@
|
|||
#include <xf86drm.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static const struct pb_vtbl amdgpu_winsys_bo_vtbl;
|
||||
|
||||
static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
|
||||
{
|
||||
assert(bo->vtbl == &amdgpu_winsys_bo_vtbl);
|
||||
return (struct amdgpu_winsys_bo *)bo;
|
||||
}
|
||||
|
||||
struct amdgpu_bomgr {
|
||||
struct pb_manager base;
|
||||
struct amdgpu_winsys *rws;
|
||||
};
|
||||
|
||||
static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr)
|
||||
{
|
||||
return ((struct amdgpu_bomgr*)mgr)->rws;
|
||||
}
|
||||
|
||||
static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = NULL;
|
||||
|
||||
if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) {
|
||||
bo = amdgpu_winsys_bo(_buf);
|
||||
} else {
|
||||
struct pb_buffer *base_buf;
|
||||
pb_size offset;
|
||||
pb_get_base_buffer(_buf, &base_buf, &offset);
|
||||
|
||||
if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl)
|
||||
bo = amdgpu_winsys_bo(base_buf);
|
||||
}
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_winsys *ws = bo->rws;
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_winsys *ws = bo->ws;
|
||||
int i;
|
||||
|
||||
if (bo->is_shared) {
|
||||
|
|
@ -149,12 +118,12 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
|
|||
}
|
||||
|
||||
static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
struct pb_buffer *buf)
|
||||
{
|
||||
return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
|
||||
}
|
||||
|
||||
static void amdgpu_bo_destroy(struct pb_buffer *_buf)
|
||||
void amdgpu_bo_destroy(struct pb_buffer *_buf)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
int i;
|
||||
|
|
@ -167,13 +136,23 @@ static void amdgpu_bo_destroy(struct pb_buffer *_buf)
|
|||
amdgpu_fence_reference(&bo->fence[i], NULL);
|
||||
|
||||
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size);
|
||||
bo->ws->allocated_vram -= align(bo->base.size, bo->ws->gart_page_size);
|
||||
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
|
||||
bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size);
|
||||
bo->ws->allocated_gtt -= align(bo->base.size, bo->ws->gart_page_size);
|
||||
FREE(bo);
|
||||
}
|
||||
|
||||
static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
|
||||
static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
|
||||
if (bo->use_reusable_pool)
|
||||
pb_cache_add_buffer(&bo->cache_entry);
|
||||
else
|
||||
amdgpu_bo_destroy(_buf);
|
||||
}
|
||||
|
||||
static void *amdgpu_bo_map(struct pb_buffer *buf,
|
||||
struct radeon_winsys_cs *rcs,
|
||||
enum pipe_transfer_usage usage)
|
||||
{
|
||||
|
|
@ -241,7 +220,7 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
|
|||
RADEON_USAGE_READWRITE);
|
||||
}
|
||||
|
||||
bo->rws->buffer_wait_time += os_time_get_nano() - time;
|
||||
bo->ws->buffer_wait_time += os_time_get_nano() - time;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -250,52 +229,33 @@ static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
|
|||
return bo->user_ptr;
|
||||
|
||||
r = amdgpu_bo_cpu_map(bo->bo, &cpu);
|
||||
if (r) {
|
||||
/* Clear the cache and try again. */
|
||||
pb_cache_release_all_buffers(&bo->ws->bo_cache);
|
||||
r = amdgpu_bo_cpu_map(bo->bo, &cpu);
|
||||
}
|
||||
return r ? NULL : cpu;
|
||||
}
|
||||
|
||||
static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf)
|
||||
static void amdgpu_bo_unmap(struct pb_buffer *buf)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
|
||||
|
||||
amdgpu_bo_cpu_unmap(bo->bo);
|
||||
}
|
||||
|
||||
static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf,
|
||||
struct pb_buffer **base_buf,
|
||||
unsigned *offset)
|
||||
{
|
||||
*base_buf = buf;
|
||||
*offset = 0;
|
||||
}
|
||||
|
||||
static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf,
|
||||
struct pb_validate *vl,
|
||||
unsigned flags)
|
||||
{
|
||||
/* Always pinned */
|
||||
return PIPE_OK;
|
||||
}
|
||||
|
||||
static void amdgpu_bo_fence(struct pb_buffer *buf,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
}
|
||||
|
||||
static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
|
||||
amdgpu_bo_destroy,
|
||||
NULL, /* never called */
|
||||
NULL, /* never called */
|
||||
amdgpu_bo_validate,
|
||||
amdgpu_bo_fence,
|
||||
amdgpu_bo_get_base_buffer,
|
||||
amdgpu_bo_destroy_or_cache
|
||||
/* other functions are never called */
|
||||
};
|
||||
|
||||
static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
|
||||
pb_size size,
|
||||
const struct pb_desc *desc)
|
||||
static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
|
||||
unsigned size,
|
||||
unsigned alignment,
|
||||
unsigned usage,
|
||||
enum radeon_bo_domain initial_domain,
|
||||
unsigned flags)
|
||||
{
|
||||
struct amdgpu_winsys *rws = get_winsys(_mgr);
|
||||
struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc;
|
||||
struct amdgpu_bo_alloc_request request = {0};
|
||||
amdgpu_bo_handle buf_handle;
|
||||
uint64_t va = 0;
|
||||
|
|
@ -303,37 +263,38 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
|
|||
amdgpu_va_handle va_handle;
|
||||
int r;
|
||||
|
||||
assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT);
|
||||
assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
|
||||
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
|
||||
if (!bo) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base);
|
||||
request.alloc_size = size;
|
||||
request.phys_alignment = desc->alignment;
|
||||
request.phys_alignment = alignment;
|
||||
|
||||
if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) {
|
||||
if (initial_domain & RADEON_DOMAIN_VRAM) {
|
||||
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
|
||||
if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
|
||||
if (flags & RADEON_FLAG_CPU_ACCESS)
|
||||
request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
||||
}
|
||||
if (rdesc->initial_domain & RADEON_DOMAIN_GTT) {
|
||||
if (initial_domain & RADEON_DOMAIN_GTT) {
|
||||
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
|
||||
if (rdesc->flags & RADEON_FLAG_GTT_WC)
|
||||
if (flags & RADEON_FLAG_GTT_WC)
|
||||
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_alloc(rws->dev, &request, &buf_handle);
|
||||
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
|
||||
fprintf(stderr, "amdgpu: size : %d bytes\n", size);
|
||||
fprintf(stderr, "amdgpu: alignment : %d bytes\n", desc->alignment);
|
||||
fprintf(stderr, "amdgpu: domains : %d\n", rdesc->initial_domain);
|
||||
fprintf(stderr, "amdgpu: alignment : %d bytes\n", alignment);
|
||||
fprintf(stderr, "amdgpu: domains : %d\n", initial_domain);
|
||||
goto error_bo_alloc;
|
||||
}
|
||||
|
||||
r = amdgpu_va_range_alloc(rws->dev, amdgpu_gpu_va_range_general,
|
||||
size, desc->alignment, 0, &va, &va_handle, 0);
|
||||
r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
|
||||
size, alignment, 0, &va, &va_handle, 0);
|
||||
if (r)
|
||||
goto error_va_alloc;
|
||||
|
||||
|
|
@ -342,23 +303,23 @@ static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
|
|||
goto error_va_map;
|
||||
|
||||
pipe_reference_init(&bo->base.reference, 1);
|
||||
bo->base.alignment = desc->alignment;
|
||||
bo->base.usage = desc->usage;
|
||||
bo->base.alignment = alignment;
|
||||
bo->base.usage = usage;
|
||||
bo->base.size = size;
|
||||
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
|
||||
bo->rws = rws;
|
||||
bo->ws = ws;
|
||||
bo->bo = buf_handle;
|
||||
bo->va = va;
|
||||
bo->va_handle = va_handle;
|
||||
bo->initial_domain = rdesc->initial_domain;
|
||||
bo->unique_id = __sync_fetch_and_add(&rws->next_bo_unique_id, 1);
|
||||
bo->initial_domain = initial_domain;
|
||||
bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
|
||||
|
||||
if (rdesc->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
rws->allocated_vram += align(size, rws->gart_page_size);
|
||||
else if (rdesc->initial_domain & RADEON_DOMAIN_GTT)
|
||||
rws->allocated_gtt += align(size, rws->gart_page_size);
|
||||
if (initial_domain & RADEON_DOMAIN_VRAM)
|
||||
ws->allocated_vram += align(size, ws->gart_page_size);
|
||||
else if (initial_domain & RADEON_DOMAIN_GTT)
|
||||
ws->allocated_gtt += align(size, ws->gart_page_size);
|
||||
|
||||
return &bo->base;
|
||||
return bo;
|
||||
|
||||
error_va_map:
|
||||
amdgpu_va_range_free(va_handle);
|
||||
|
|
@ -371,48 +332,15 @@ error_bo_alloc:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void amdgpu_bomgr_flush(struct pb_manager *mgr)
|
||||
{
|
||||
/* NOP */
|
||||
}
|
||||
|
||||
/* This is for the cache bufmgr. */
|
||||
static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr,
|
||||
struct pb_buffer *_buf)
|
||||
bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
|
||||
if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
|
||||
return TRUE;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void amdgpu_bomgr_destroy(struct pb_manager *mgr)
|
||||
{
|
||||
FREE(mgr);
|
||||
}
|
||||
|
||||
struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws)
|
||||
{
|
||||
struct amdgpu_bomgr *mgr;
|
||||
|
||||
mgr = CALLOC_STRUCT(amdgpu_bomgr);
|
||||
if (!mgr)
|
||||
return NULL;
|
||||
|
||||
mgr->base.destroy = amdgpu_bomgr_destroy;
|
||||
mgr->base.create_buffer = amdgpu_bomgr_create_bo;
|
||||
mgr->base.flush = amdgpu_bomgr_flush;
|
||||
mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy;
|
||||
|
||||
mgr->rws = rws;
|
||||
return &mgr->base;
|
||||
return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
|
||||
}
|
||||
|
||||
static unsigned eg_tile_split(unsigned tile_split)
|
||||
|
|
@ -453,7 +381,7 @@ static void amdgpu_bo_get_tiling(struct pb_buffer *_buf,
|
|||
unsigned *mtilea,
|
||||
bool *scanout)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_bo_info info = {0};
|
||||
uint32_t tiling_flags;
|
||||
int r;
|
||||
|
|
@ -494,7 +422,7 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
|
|||
uint32_t pitch,
|
||||
bool scanout)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
|
||||
struct amdgpu_bo_metadata metadata = {0};
|
||||
uint32_t tiling_flags = 0;
|
||||
|
||||
|
|
@ -523,12 +451,6 @@ static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
|
|||
amdgpu_bo_set_metadata(bo->bo, &metadata);
|
||||
}
|
||||
|
||||
static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf)
|
||||
{
|
||||
/* return a direct pointer to amdgpu_winsys_bo. */
|
||||
return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf);
|
||||
}
|
||||
|
||||
static struct pb_buffer *
|
||||
amdgpu_bo_create(struct radeon_winsys *rws,
|
||||
unsigned size,
|
||||
|
|
@ -538,9 +460,8 @@ amdgpu_bo_create(struct radeon_winsys *rws,
|
|||
enum radeon_bo_flag flags)
|
||||
{
|
||||
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
|
||||
struct amdgpu_bo_desc desc;
|
||||
struct pb_manager *provider;
|
||||
struct pb_buffer *buffer;
|
||||
struct amdgpu_winsys_bo *bo;
|
||||
unsigned usage = 0;
|
||||
|
||||
/* Don't use VRAM if the GPU doesn't have much. This is only the initial
|
||||
* domain. The kernel is free to move the buffer if it wants to.
|
||||
|
|
@ -552,9 +473,6 @@ amdgpu_bo_create(struct radeon_winsys *rws,
|
|||
flags = RADEON_FLAG_GTT_WC;
|
||||
}
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.base.alignment = alignment;
|
||||
|
||||
/* Align size to page size. This is the minimum alignment for normal
|
||||
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
|
||||
* like constant/uniform buffers, can benefit from better and more reuse.
|
||||
|
|
@ -565,26 +483,33 @@ amdgpu_bo_create(struct radeon_winsys *rws,
|
|||
* might consider different sets of domains / flags compatible
|
||||
*/
|
||||
if (domain == RADEON_DOMAIN_VRAM_GTT)
|
||||
desc.base.usage = 1 << 2;
|
||||
usage = 1 << 2;
|
||||
else
|
||||
desc.base.usage = domain >> 1;
|
||||
assert(flags < sizeof(desc.base.usage) * 8 - 3);
|
||||
desc.base.usage |= 1 << (flags + 3);
|
||||
usage = domain >> 1;
|
||||
assert(flags < sizeof(usage) * 8 - 3);
|
||||
usage |= 1 << (flags + 3);
|
||||
|
||||
desc.initial_domain = domain;
|
||||
desc.flags = flags;
|
||||
/* Get a buffer from the cache. */
|
||||
if (use_reusable_pool) {
|
||||
bo = (struct amdgpu_winsys_bo*)
|
||||
pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
|
||||
usage);
|
||||
if (bo)
|
||||
return &bo->base;
|
||||
}
|
||||
|
||||
/* Assign a buffer manager. */
|
||||
if (use_reusable_pool)
|
||||
provider = ws->cman;
|
||||
else
|
||||
provider = ws->kman;
|
||||
/* Create a new one. */
|
||||
bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
|
||||
if (!bo) {
|
||||
/* Clear the cache and try again. */
|
||||
pb_cache_release_all_buffers(&ws->bo_cache);
|
||||
bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags);
|
||||
if (!bo)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer = provider->create_buffer(provider, size, &desc.base);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
return (struct pb_buffer*)buffer;
|
||||
bo->use_reusable_pool = use_reusable_pool;
|
||||
return &bo->base;
|
||||
}
|
||||
|
||||
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
|
||||
|
|
@ -648,7 +573,7 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
|
|||
bo->bo = result.buf_handle;
|
||||
bo->base.size = result.alloc_size;
|
||||
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
|
||||
bo->rws = ws;
|
||||
bo->ws = ws;
|
||||
bo->va = va;
|
||||
bo->va_handle = va_handle;
|
||||
bo->initial_domain = initial;
|
||||
|
|
@ -680,12 +605,11 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
|
|||
unsigned stride,
|
||||
struct winsys_handle *whandle)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer);
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
|
||||
enum amdgpu_bo_handle_type type;
|
||||
int r;
|
||||
|
||||
if ((void*)bo != (void*)buffer)
|
||||
pb_cache_manager_remove_buffer(buffer);
|
||||
bo->use_reusable_pool = false;
|
||||
|
||||
switch (whandle->type) {
|
||||
case DRM_API_HANDLE_TYPE_SHARED:
|
||||
|
|
@ -740,7 +664,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
|
|||
bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
|
||||
bo->base.size = size;
|
||||
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
|
||||
bo->rws = ws;
|
||||
bo->ws = ws;
|
||||
bo->user_ptr = pointer;
|
||||
bo->va = va;
|
||||
bo->va_handle = va_handle;
|
||||
|
|
@ -762,14 +686,13 @@ error:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf)
|
||||
static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf)
|
||||
{
|
||||
return ((struct amdgpu_winsys_bo*)buf)->va;
|
||||
}
|
||||
|
||||
void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
|
||||
void amdgpu_bo_init_functions(struct amdgpu_winsys *ws)
|
||||
{
|
||||
ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle;
|
||||
ws->base.buffer_set_tiling = amdgpu_bo_set_tiling;
|
||||
ws->base.buffer_get_tiling = amdgpu_bo_get_tiling;
|
||||
ws->base.buffer_map = amdgpu_bo_map;
|
||||
|
|
|
|||
|
|
@ -36,17 +36,11 @@
|
|||
#include "amdgpu_winsys.h"
|
||||
#include "pipebuffer/pb_bufmgr.h"
|
||||
|
||||
struct amdgpu_bo_desc {
|
||||
struct pb_desc base;
|
||||
|
||||
enum radeon_bo_domain initial_domain;
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
struct amdgpu_winsys_bo {
|
||||
struct pb_buffer base;
|
||||
struct pb_cache_entry cache_entry;
|
||||
|
||||
struct amdgpu_winsys *rws;
|
||||
struct amdgpu_winsys *ws;
|
||||
void *user_ptr; /* from buffer_from_ptr */
|
||||
|
||||
amdgpu_bo_handle bo;
|
||||
|
|
@ -54,6 +48,7 @@ struct amdgpu_winsys_bo {
|
|||
amdgpu_va_handle va_handle;
|
||||
uint64_t va;
|
||||
enum radeon_bo_domain initial_domain;
|
||||
bool use_reusable_pool;
|
||||
|
||||
/* how many command streams is this bo referenced in? */
|
||||
int num_cs_references;
|
||||
|
|
@ -67,8 +62,9 @@ struct amdgpu_winsys_bo {
|
|||
struct pipe_fence_handle *fence[RING_LAST];
|
||||
};
|
||||
|
||||
struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws);
|
||||
void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws);
|
||||
bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
|
||||
void amdgpu_bo_destroy(struct pb_buffer *_buf);
|
||||
void amdgpu_bo_init_functions(struct amdgpu_winsys *ws);
|
||||
|
||||
static inline
|
||||
void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
|
||||
|
|
|
|||
|
|
@ -214,7 +214,6 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
|
|||
if (!cs->big_ib_buffer ||
|
||||
cs->used_ib_space + ib_size > cs->big_ib_buffer->size) {
|
||||
struct radeon_winsys *ws = &cs->ctx->ws->base;
|
||||
struct radeon_winsys_cs_handle *winsys_bo;
|
||||
|
||||
pb_reference(&cs->big_ib_buffer, NULL);
|
||||
cs->big_ib_winsys_buffer = NULL;
|
||||
|
|
@ -228,15 +227,14 @@ static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
|
|||
if (!cs->big_ib_buffer)
|
||||
return false;
|
||||
|
||||
winsys_bo = ws->buffer_get_cs_handle(cs->big_ib_buffer);
|
||||
|
||||
cs->ib_mapped = ws->buffer_map(winsys_bo, NULL, PIPE_TRANSFER_WRITE);
|
||||
cs->ib_mapped = ws->buffer_map(cs->big_ib_buffer, NULL,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
if (!cs->ib_mapped) {
|
||||
pb_reference(&cs->big_ib_buffer, NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)winsys_bo;
|
||||
cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)cs->big_ib_buffer;
|
||||
}
|
||||
|
||||
cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
|
||||
|
|
@ -338,7 +336,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
|
|||
void (*flush)(void *ctx, unsigned flags,
|
||||
struct pipe_fence_handle **fence),
|
||||
void *flush_ctx,
|
||||
struct radeon_winsys_cs_handle *trace_buf)
|
||||
struct pb_buffer *trace_buf)
|
||||
{
|
||||
struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
|
||||
struct amdgpu_cs *cs;
|
||||
|
|
@ -457,7 +455,7 @@ static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
|
|||
}
|
||||
|
||||
static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
enum radeon_bo_priority priority)
|
||||
|
|
@ -480,7 +478,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
|||
}
|
||||
|
||||
static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
struct pb_buffer *buf)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
|
||||
|
|
@ -684,7 +682,7 @@ static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
|
|||
}
|
||||
|
||||
static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *_buf,
|
||||
struct pb_buffer *_buf,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
|
|||
struct amdgpu_winsys_bo *bo)
|
||||
{
|
||||
int num_refs = bo->num_cs_references;
|
||||
return num_refs == bo->rws->num_cs ||
|
||||
return num_refs == bo->ws->num_cs ||
|
||||
(num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -304,11 +304,8 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
|||
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
|
||||
|
||||
pipe_mutex_destroy(ws->bo_fence_lock);
|
||||
|
||||
ws->cman->destroy(ws->cman);
|
||||
ws->kman->destroy(ws->kman);
|
||||
pb_cache_deinit(&ws->bo_cache);
|
||||
AddrDestroy(ws->addrlib);
|
||||
|
||||
amdgpu_device_deinitialize(ws->dev);
|
||||
FREE(rws);
|
||||
}
|
||||
|
|
@ -389,9 +386,9 @@ static int compare_dev(void *key1, void *key2)
|
|||
return key1 != key2;
|
||||
}
|
||||
|
||||
static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
|
||||
static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
|
||||
{
|
||||
struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws;
|
||||
struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
|
||||
bool destroy;
|
||||
|
||||
/* When the reference counter drops to zero, remove the device pointer
|
||||
|
|
@ -401,9 +398,9 @@ static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
|
|||
* from the table when the counter drops to 0. */
|
||||
pipe_mutex_lock(dev_tab_mutex);
|
||||
|
||||
destroy = pipe_reference(&rws->reference, NULL);
|
||||
destroy = pipe_reference(&ws->reference, NULL);
|
||||
if (destroy && dev_tab)
|
||||
util_hash_table_remove(dev_tab, rws->dev);
|
||||
util_hash_table_remove(dev_tab, ws->dev);
|
||||
|
||||
pipe_mutex_unlock(dev_tab_mutex);
|
||||
return destroy;
|
||||
|
|
@ -461,13 +458,9 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
goto fail;
|
||||
|
||||
/* Create managers. */
|
||||
ws->kman = amdgpu_bomgr_create(ws);
|
||||
if (!ws->kman)
|
||||
goto fail;
|
||||
ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
|
||||
(ws->info.vram_size + ws->info.gart_size) / 8);
|
||||
if (!ws->cman)
|
||||
goto fail;
|
||||
pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
|
||||
(ws->info.vram_size + ws->info.gart_size) / 8,
|
||||
amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
|
||||
|
||||
/* init reference */
|
||||
pipe_reference_init(&ws->reference, 1);
|
||||
|
|
@ -480,7 +473,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
ws->base.query_value = amdgpu_query_value;
|
||||
ws->base.read_registers = amdgpu_read_registers;
|
||||
|
||||
amdgpu_bomgr_init_functions(ws);
|
||||
amdgpu_bo_init_functions(ws);
|
||||
amdgpu_cs_init_functions(ws);
|
||||
amdgpu_surface_init_functions(ws);
|
||||
|
||||
|
|
@ -509,10 +502,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
|
||||
fail:
|
||||
pipe_mutex_unlock(dev_tab_mutex);
|
||||
if (ws->cman)
|
||||
ws->cman->destroy(ws->cman);
|
||||
if (ws->kman)
|
||||
ws->kman->destroy(ws->kman);
|
||||
pb_cache_deinit(&ws->bo_cache);
|
||||
FREE(ws);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#ifndef AMDGPU_WINSYS_H
|
||||
#define AMDGPU_WINSYS_H
|
||||
|
||||
#include "pipebuffer/pb_cache.h"
|
||||
#include "gallium/drivers/radeon/radeon_winsys.h"
|
||||
#include "addrlib/addrinterface.h"
|
||||
#include "os/os_thread.h"
|
||||
|
|
@ -42,6 +43,7 @@ struct amdgpu_cs;
|
|||
struct amdgpu_winsys {
|
||||
struct radeon_winsys base;
|
||||
struct pipe_reference reference;
|
||||
struct pb_cache bo_cache;
|
||||
|
||||
amdgpu_device_handle dev;
|
||||
|
||||
|
|
@ -57,9 +59,6 @@ struct amdgpu_winsys {
|
|||
|
||||
struct radeon_info info;
|
||||
|
||||
struct pb_manager *kman;
|
||||
struct pb_manager *cman;
|
||||
|
||||
struct amdgpu_gpu_info amdinfo;
|
||||
ADDR_HANDLE addrlib;
|
||||
uint32_t rev_id;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@
|
|||
#include "util/u_hash_table.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/simple_list.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "os/os_mman.h"
|
||||
#include "os/os_time.h"
|
||||
|
|
@ -42,11 +41,8 @@
|
|||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static const struct pb_vtbl radeon_bo_vtbl;
|
||||
|
||||
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
|
||||
{
|
||||
assert(bo->vtbl == &radeon_bo_vtbl);
|
||||
return (struct radeon_bo *)bo;
|
||||
}
|
||||
|
||||
|
|
@ -56,54 +52,6 @@ struct radeon_bo_va_hole {
|
|||
uint64_t size;
|
||||
};
|
||||
|
||||
struct radeon_bomgr {
|
||||
/* Base class. */
|
||||
struct pb_manager base;
|
||||
|
||||
/* Winsys. */
|
||||
struct radeon_drm_winsys *rws;
|
||||
|
||||
/* List of buffer GEM names. Protected by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_names;
|
||||
/* List of buffer handles. Protectded by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_handles;
|
||||
/* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_vas;
|
||||
pipe_mutex bo_handles_mutex;
|
||||
pipe_mutex bo_va_mutex;
|
||||
|
||||
/* is virtual address supported */
|
||||
bool va;
|
||||
uint64_t va_offset;
|
||||
struct list_head va_holes;
|
||||
|
||||
/* BO size alignment */
|
||||
unsigned size_align;
|
||||
};
|
||||
|
||||
static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
|
||||
{
|
||||
return (struct radeon_bomgr *)mgr;
|
||||
}
|
||||
|
||||
static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
|
||||
{
|
||||
struct radeon_bo *bo = NULL;
|
||||
|
||||
if (_buf->vtbl == &radeon_bo_vtbl) {
|
||||
bo = radeon_bo(_buf);
|
||||
} else {
|
||||
struct pb_buffer *base_buf;
|
||||
pb_size offset;
|
||||
pb_get_base_buffer(_buf, &base_buf, &offset);
|
||||
|
||||
if (base_buf->vtbl == &radeon_bo_vtbl)
|
||||
bo = radeon_bo(base_buf);
|
||||
}
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static bool radeon_bo_is_busy(struct radeon_bo *bo)
|
||||
{
|
||||
struct drm_radeon_gem_busy args = {0};
|
||||
|
|
@ -125,7 +73,7 @@ static void radeon_bo_wait_idle(struct radeon_bo *bo)
|
|||
static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
struct radeon_bo *bo = get_radeon_bo(_buf);
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
int64_t abs_timeout;
|
||||
|
||||
/* No timeout. Just query. */
|
||||
|
|
@ -167,7 +115,7 @@ static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
|
|||
}
|
||||
|
||||
static enum radeon_bo_domain radeon_bo_get_initial_domain(
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
struct pb_buffer *buf)
|
||||
{
|
||||
struct radeon_bo *bo = (struct radeon_bo*)buf;
|
||||
struct drm_radeon_gem_op args;
|
||||
|
|
@ -186,7 +134,8 @@ static enum radeon_bo_domain radeon_bo_get_initial_domain(
|
|||
return get_valid_domain(args.value);
|
||||
}
|
||||
|
||||
static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, uint64_t alignment)
|
||||
static uint64_t radeon_bomgr_find_va(struct radeon_drm_winsys *rws,
|
||||
uint64_t size, uint64_t alignment)
|
||||
{
|
||||
struct radeon_bo_va_hole *hole, *n;
|
||||
uint64_t offset = 0, waste = 0;
|
||||
|
|
@ -194,11 +143,11 @@ static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, ui
|
|||
/* All VM address space holes will implicitly start aligned to the
|
||||
* size alignment, so we don't need to sanitize the alignment here
|
||||
*/
|
||||
size = align(size, mgr->size_align);
|
||||
size = align(size, rws->size_align);
|
||||
|
||||
pipe_mutex_lock(mgr->bo_va_mutex);
|
||||
pipe_mutex_lock(rws->bo_va_mutex);
|
||||
/* first look for a hole */
|
||||
LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
|
||||
LIST_FOR_EACH_ENTRY_SAFE(hole, n, &rws->va_holes, list) {
|
||||
offset = hole->offset;
|
||||
waste = offset % alignment;
|
||||
waste = waste ? alignment - waste : 0;
|
||||
|
|
@ -210,7 +159,7 @@ static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, ui
|
|||
offset = hole->offset;
|
||||
list_del(&hole->list);
|
||||
FREE(hole);
|
||||
pipe_mutex_unlock(mgr->bo_va_mutex);
|
||||
pipe_mutex_unlock(rws->bo_va_mutex);
|
||||
return offset;
|
||||
}
|
||||
if ((hole->size - waste) > size) {
|
||||
|
|
@ -222,45 +171,46 @@ static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, ui
|
|||
}
|
||||
hole->size -= (size + waste);
|
||||
hole->offset += size + waste;
|
||||
pipe_mutex_unlock(mgr->bo_va_mutex);
|
||||
pipe_mutex_unlock(rws->bo_va_mutex);
|
||||
return offset;
|
||||
}
|
||||
if ((hole->size - waste) == size) {
|
||||
hole->size = waste;
|
||||
pipe_mutex_unlock(mgr->bo_va_mutex);
|
||||
pipe_mutex_unlock(rws->bo_va_mutex);
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
|
||||
offset = mgr->va_offset;
|
||||
offset = rws->va_offset;
|
||||
waste = offset % alignment;
|
||||
waste = waste ? alignment - waste : 0;
|
||||
if (waste) {
|
||||
n = CALLOC_STRUCT(radeon_bo_va_hole);
|
||||
n->size = waste;
|
||||
n->offset = offset;
|
||||
list_add(&n->list, &mgr->va_holes);
|
||||
list_add(&n->list, &rws->va_holes);
|
||||
}
|
||||
offset += waste;
|
||||
mgr->va_offset += size + waste;
|
||||
pipe_mutex_unlock(mgr->bo_va_mutex);
|
||||
rws->va_offset += size + waste;
|
||||
pipe_mutex_unlock(rws->bo_va_mutex);
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
|
||||
static void radeon_bomgr_free_va(struct radeon_drm_winsys *rws,
|
||||
uint64_t va, uint64_t size)
|
||||
{
|
||||
struct radeon_bo_va_hole *hole;
|
||||
|
||||
size = align(size, mgr->size_align);
|
||||
size = align(size, rws->size_align);
|
||||
|
||||
pipe_mutex_lock(mgr->bo_va_mutex);
|
||||
if ((va + size) == mgr->va_offset) {
|
||||
mgr->va_offset = va;
|
||||
pipe_mutex_lock(rws->bo_va_mutex);
|
||||
if ((va + size) == rws->va_offset) {
|
||||
rws->va_offset = va;
|
||||
/* Delete uppermost hole if it reaches the new top */
|
||||
if (!LIST_IS_EMPTY(&mgr->va_holes)) {
|
||||
hole = container_of(mgr->va_holes.next, hole, list);
|
||||
if (!LIST_IS_EMPTY(&rws->va_holes)) {
|
||||
hole = container_of(rws->va_holes.next, hole, list);
|
||||
if ((hole->offset + hole->size) == va) {
|
||||
mgr->va_offset = hole->offset;
|
||||
rws->va_offset = hole->offset;
|
||||
list_del(&hole->list);
|
||||
FREE(hole);
|
||||
}
|
||||
|
|
@ -268,20 +218,20 @@ static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t
|
|||
} else {
|
||||
struct radeon_bo_va_hole *next;
|
||||
|
||||
hole = container_of(&mgr->va_holes, hole, list);
|
||||
LIST_FOR_EACH_ENTRY(next, &mgr->va_holes, list) {
|
||||
hole = container_of(&rws->va_holes, hole, list);
|
||||
LIST_FOR_EACH_ENTRY(next, &rws->va_holes, list) {
|
||||
if (next->offset < va)
|
||||
break;
|
||||
hole = next;
|
||||
}
|
||||
|
||||
if (&hole->list != &mgr->va_holes) {
|
||||
if (&hole->list != &rws->va_holes) {
|
||||
/* Grow upper hole if it's adjacent */
|
||||
if (hole->offset == (va + size)) {
|
||||
hole->offset = va;
|
||||
hole->size += size;
|
||||
/* Merge lower hole if it's adjacent */
|
||||
if (next != hole && &next->list != &mgr->va_holes &&
|
||||
if (next != hole && &next->list != &rws->va_holes &&
|
||||
(next->offset + next->size) == va) {
|
||||
next->size += hole->size;
|
||||
list_del(&hole->list);
|
||||
|
|
@ -292,7 +242,7 @@ static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t
|
|||
}
|
||||
|
||||
/* Grow lower hole if it's adjacent */
|
||||
if (next != hole && &next->list != &mgr->va_holes &&
|
||||
if (next != hole && &next->list != &rws->va_holes &&
|
||||
(next->offset + next->size) == va) {
|
||||
next->size += size;
|
||||
goto out;
|
||||
|
|
@ -309,30 +259,30 @@ static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t
|
|||
}
|
||||
}
|
||||
out:
|
||||
pipe_mutex_unlock(mgr->bo_va_mutex);
|
||||
pipe_mutex_unlock(rws->bo_va_mutex);
|
||||
}
|
||||
|
||||
static void radeon_bo_destroy(struct pb_buffer *_buf)
|
||||
void radeon_bo_destroy(struct pb_buffer *_buf)
|
||||
{
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
struct radeon_bomgr *mgr = bo->mgr;
|
||||
struct radeon_drm_winsys *rws = bo->rws;
|
||||
struct drm_gem_close args;
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
||||
pipe_mutex_lock(bo->mgr->bo_handles_mutex);
|
||||
util_hash_table_remove(bo->mgr->bo_handles, (void*)(uintptr_t)bo->handle);
|
||||
pipe_mutex_lock(rws->bo_handles_mutex);
|
||||
util_hash_table_remove(rws->bo_handles, (void*)(uintptr_t)bo->handle);
|
||||
if (bo->flink_name) {
|
||||
util_hash_table_remove(bo->mgr->bo_names,
|
||||
util_hash_table_remove(rws->bo_names,
|
||||
(void*)(uintptr_t)bo->flink_name);
|
||||
}
|
||||
pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(rws->bo_handles_mutex);
|
||||
|
||||
if (bo->ptr)
|
||||
os_munmap(bo->ptr, bo->base.size);
|
||||
|
||||
if (mgr->va) {
|
||||
if (bo->rws->va_unmap_working) {
|
||||
if (rws->info.r600_virtual_address) {
|
||||
if (rws->va_unmap_working) {
|
||||
struct drm_radeon_gem_va va;
|
||||
|
||||
va.handle = bo->handle;
|
||||
|
|
@ -343,7 +293,7 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
|
|||
RADEON_VM_PAGE_SNOOPED;
|
||||
va.offset = bo->va;
|
||||
|
||||
if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_VA, &va,
|
||||
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
|
||||
sizeof(va)) != 0 &&
|
||||
va.operation == RADEON_VA_RESULT_ERROR) {
|
||||
fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
|
||||
|
|
@ -352,22 +302,32 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
|
|||
}
|
||||
}
|
||||
|
||||
radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
|
||||
radeon_bomgr_free_va(rws, bo->va, bo->base.size);
|
||||
}
|
||||
|
||||
/* Close object. */
|
||||
args.handle = bo->handle;
|
||||
drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
|
||||
drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
|
||||
|
||||
pipe_mutex_destroy(bo->map_mutex);
|
||||
|
||||
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
bo->rws->allocated_vram -= align(bo->base.size, mgr->size_align);
|
||||
rws->allocated_vram -= align(bo->base.size, rws->size_align);
|
||||
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
|
||||
bo->rws->allocated_gtt -= align(bo->base.size, mgr->size_align);
|
||||
rws->allocated_gtt -= align(bo->base.size, rws->size_align);
|
||||
FREE(bo);
|
||||
}
|
||||
|
||||
static void radeon_bo_destroy_or_cache(struct pb_buffer *_buf)
|
||||
{
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
|
||||
if (bo->use_reusable_pool)
|
||||
pb_cache_add_buffer(&bo->cache_entry);
|
||||
else
|
||||
radeon_bo_destroy(_buf);
|
||||
}
|
||||
|
||||
void *radeon_bo_do_map(struct radeon_bo *bo)
|
||||
{
|
||||
struct drm_radeon_gem_mmap args = {0};
|
||||
|
|
@ -401,9 +361,16 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
|
|||
ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
bo->rws->fd, args.addr_ptr);
|
||||
if (ptr == MAP_FAILED) {
|
||||
pipe_mutex_unlock(bo->map_mutex);
|
||||
fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
|
||||
return NULL;
|
||||
/* Clear the cache and try again. */
|
||||
pb_cache_release_all_buffers(&bo->rws->bo_cache);
|
||||
|
||||
ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
bo->rws->fd, args.addr_ptr);
|
||||
if (ptr == MAP_FAILED) {
|
||||
pipe_mutex_unlock(bo->map_mutex);
|
||||
fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bo->ptr = ptr;
|
||||
bo->map_count = 1;
|
||||
|
|
@ -412,7 +379,7 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
|
|||
return bo->ptr;
|
||||
}
|
||||
|
||||
static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
|
||||
static void *radeon_bo_map(struct pb_buffer *buf,
|
||||
struct radeon_winsys_cs *rcs,
|
||||
enum pipe_transfer_usage usage)
|
||||
{
|
||||
|
|
@ -483,14 +450,14 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
|
|||
RADEON_USAGE_READWRITE);
|
||||
}
|
||||
|
||||
bo->mgr->rws->buffer_wait_time += os_time_get_nano() - time;
|
||||
bo->rws->buffer_wait_time += os_time_get_nano() - time;
|
||||
}
|
||||
}
|
||||
|
||||
return radeon_bo_do_map(bo);
|
||||
}
|
||||
|
||||
static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
|
||||
static void radeon_bo_unmap(struct pb_buffer *_buf)
|
||||
{
|
||||
struct radeon_bo *bo = (struct radeon_bo*)_buf;
|
||||
|
||||
|
|
@ -514,34 +481,9 @@ static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
|
|||
pipe_mutex_unlock(bo->map_mutex);
|
||||
}
|
||||
|
||||
static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
|
||||
struct pb_buffer **base_buf,
|
||||
unsigned *offset)
|
||||
{
|
||||
*base_buf = buf;
|
||||
*offset = 0;
|
||||
}
|
||||
|
||||
static enum pipe_error radeon_bo_validate(struct pb_buffer *_buf,
|
||||
struct pb_validate *vl,
|
||||
unsigned flags)
|
||||
{
|
||||
/* Always pinned */
|
||||
return PIPE_OK;
|
||||
}
|
||||
|
||||
static void radeon_bo_fence(struct pb_buffer *buf,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
}
|
||||
|
||||
static const struct pb_vtbl radeon_bo_vtbl = {
|
||||
radeon_bo_destroy,
|
||||
NULL, /* never called */
|
||||
NULL, /* never called */
|
||||
radeon_bo_validate,
|
||||
radeon_bo_fence,
|
||||
radeon_bo_get_base_buffer,
|
||||
radeon_bo_destroy_or_cache
|
||||
/* other functions are never called */
|
||||
};
|
||||
|
||||
#ifndef RADEON_GEM_GTT_WC
|
||||
|
|
@ -556,40 +498,39 @@ static const struct pb_vtbl radeon_bo_vtbl = {
|
|||
#define RADEON_GEM_NO_CPU_ACCESS (1 << 4)
|
||||
#endif
|
||||
|
||||
static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
|
||||
pb_size size,
|
||||
const struct pb_desc *desc)
|
||||
static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
|
||||
unsigned size, unsigned alignment,
|
||||
unsigned usage,
|
||||
unsigned initial_domains,
|
||||
unsigned flags)
|
||||
{
|
||||
struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
|
||||
struct radeon_drm_winsys *rws = mgr->rws;
|
||||
struct radeon_bo *bo;
|
||||
struct drm_radeon_gem_create args;
|
||||
struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
|
||||
int r;
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
||||
assert(rdesc->initial_domains);
|
||||
assert((rdesc->initial_domains &
|
||||
assert(initial_domains);
|
||||
assert((initial_domains &
|
||||
~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
|
||||
|
||||
args.size = size;
|
||||
args.alignment = desc->alignment;
|
||||
args.initial_domain = rdesc->initial_domains;
|
||||
args.alignment = alignment;
|
||||
args.initial_domain = initial_domains;
|
||||
args.flags = 0;
|
||||
|
||||
if (rdesc->flags & RADEON_FLAG_GTT_WC)
|
||||
if (flags & RADEON_FLAG_GTT_WC)
|
||||
args.flags |= RADEON_GEM_GTT_WC;
|
||||
if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
|
||||
if (flags & RADEON_FLAG_CPU_ACCESS)
|
||||
args.flags |= RADEON_GEM_CPU_ACCESS;
|
||||
if (rdesc->flags & RADEON_FLAG_NO_CPU_ACCESS)
|
||||
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
|
||||
args.flags |= RADEON_GEM_NO_CPU_ACCESS;
|
||||
|
||||
if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
|
||||
&args, sizeof(args))) {
|
||||
fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
|
||||
fprintf(stderr, "radeon: size : %d bytes\n", size);
|
||||
fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
|
||||
fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
|
||||
fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
|
||||
fprintf(stderr, "radeon: flags : %d\n", args.flags);
|
||||
return NULL;
|
||||
|
|
@ -600,21 +541,21 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
|
|||
return NULL;
|
||||
|
||||
pipe_reference_init(&bo->base.reference, 1);
|
||||
bo->base.alignment = desc->alignment;
|
||||
bo->base.usage = desc->usage;
|
||||
bo->base.alignment = alignment;
|
||||
bo->base.usage = usage;
|
||||
bo->base.size = size;
|
||||
bo->base.vtbl = &radeon_bo_vtbl;
|
||||
bo->mgr = mgr;
|
||||
bo->rws = mgr->rws;
|
||||
bo->rws = rws;
|
||||
bo->handle = args.handle;
|
||||
bo->va = 0;
|
||||
bo->initial_domain = rdesc->initial_domains;
|
||||
bo->initial_domain = initial_domains;
|
||||
pipe_mutex_init(bo->map_mutex);
|
||||
pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base);
|
||||
|
||||
if (mgr->va) {
|
||||
if (rws->info.r600_virtual_address) {
|
||||
struct drm_radeon_gem_va va;
|
||||
|
||||
bo->va = radeon_bomgr_find_va(mgr, size, desc->alignment);
|
||||
bo->va = radeon_bomgr_find_va(rws, size, alignment);
|
||||
|
||||
va.handle = bo->handle;
|
||||
va.vm_id = 0;
|
||||
|
|
@ -627,108 +568,43 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
|
|||
if (r && va.operation == RADEON_VA_RESULT_ERROR) {
|
||||
fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
|
||||
fprintf(stderr, "radeon: size : %d bytes\n", size);
|
||||
fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
|
||||
fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
|
||||
fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
|
||||
fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
|
||||
radeon_bo_destroy(&bo->base);
|
||||
return NULL;
|
||||
}
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(rws->bo_handles_mutex);
|
||||
if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
|
||||
struct pb_buffer *b = &bo->base;
|
||||
struct radeon_bo *old_bo =
|
||||
util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
util_hash_table_get(rws->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(rws->bo_handles_mutex);
|
||||
pb_reference(&b, &old_bo->base);
|
||||
return b;
|
||||
}
|
||||
|
||||
util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
util_hash_table_set(rws->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(rws->bo_handles_mutex);
|
||||
}
|
||||
|
||||
if (rdesc->initial_domains & RADEON_DOMAIN_VRAM)
|
||||
rws->allocated_vram += align(size, mgr->size_align);
|
||||
else if (rdesc->initial_domains & RADEON_DOMAIN_GTT)
|
||||
rws->allocated_gtt += align(size, mgr->size_align);
|
||||
if (initial_domains & RADEON_DOMAIN_VRAM)
|
||||
rws->allocated_vram += align(size, rws->size_align);
|
||||
else if (initial_domains & RADEON_DOMAIN_GTT)
|
||||
rws->allocated_gtt += align(size, rws->size_align);
|
||||
|
||||
return &bo->base;
|
||||
}
|
||||
|
||||
static void radeon_bomgr_flush(struct pb_manager *mgr)
|
||||
{
|
||||
/* NOP */
|
||||
}
|
||||
|
||||
/* This is for the cache bufmgr. */
|
||||
static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr,
|
||||
struct pb_buffer *_buf)
|
||||
bool radeon_bo_can_reclaim(struct pb_buffer *_buf)
|
||||
{
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
|
||||
if (radeon_bo_is_referenced_by_any_cs(bo)) {
|
||||
return TRUE;
|
||||
}
|
||||
if (radeon_bo_is_referenced_by_any_cs(bo))
|
||||
return false;
|
||||
|
||||
if (!radeon_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void radeon_bomgr_destroy(struct pb_manager *_mgr)
|
||||
{
|
||||
struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
|
||||
util_hash_table_destroy(mgr->bo_names);
|
||||
util_hash_table_destroy(mgr->bo_handles);
|
||||
util_hash_table_destroy(mgr->bo_vas);
|
||||
pipe_mutex_destroy(mgr->bo_handles_mutex);
|
||||
pipe_mutex_destroy(mgr->bo_va_mutex);
|
||||
FREE(mgr);
|
||||
}
|
||||
|
||||
#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
|
||||
|
||||
static unsigned handle_hash(void *key)
|
||||
{
|
||||
return PTR_TO_UINT(key);
|
||||
}
|
||||
|
||||
static int handle_compare(void *key1, void *key2)
|
||||
{
|
||||
return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
|
||||
}
|
||||
|
||||
struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
|
||||
{
|
||||
struct radeon_bomgr *mgr;
|
||||
|
||||
mgr = CALLOC_STRUCT(radeon_bomgr);
|
||||
if (!mgr)
|
||||
return NULL;
|
||||
|
||||
mgr->base.destroy = radeon_bomgr_destroy;
|
||||
mgr->base.create_buffer = radeon_bomgr_create_bo;
|
||||
mgr->base.flush = radeon_bomgr_flush;
|
||||
mgr->base.is_buffer_busy = radeon_bomgr_is_buffer_busy;
|
||||
|
||||
mgr->rws = rws;
|
||||
mgr->bo_names = util_hash_table_create(handle_hash, handle_compare);
|
||||
mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
|
||||
mgr->bo_vas = util_hash_table_create(handle_hash, handle_compare);
|
||||
pipe_mutex_init(mgr->bo_handles_mutex);
|
||||
pipe_mutex_init(mgr->bo_va_mutex);
|
||||
|
||||
mgr->va = rws->info.r600_virtual_address;
|
||||
mgr->va_offset = rws->va_start;
|
||||
list_inithead(&mgr->va_holes);
|
||||
|
||||
/* TTM aligns the BO size to the CPU page size */
|
||||
mgr->size_align = sysconf(_SC_PAGESIZE);
|
||||
|
||||
return &mgr->base;
|
||||
return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
|
||||
}
|
||||
|
||||
static unsigned eg_tile_split(unsigned tile_split)
|
||||
|
|
@ -769,7 +645,7 @@ static void radeon_bo_get_tiling(struct pb_buffer *_buf,
|
|||
unsigned *mtilea,
|
||||
bool *scanout)
|
||||
{
|
||||
struct radeon_bo *bo = get_radeon_bo(_buf);
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
struct drm_radeon_gem_set_tiling args;
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
|
@ -814,7 +690,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
|
|||
uint32_t pitch,
|
||||
bool scanout)
|
||||
{
|
||||
struct radeon_bo *bo = get_radeon_bo(_buf);
|
||||
struct radeon_bo *bo = radeon_bo(_buf);
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
struct drm_radeon_gem_set_tiling args;
|
||||
|
||||
|
|
@ -863,12 +739,6 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
|
|||
sizeof(args));
|
||||
}
|
||||
|
||||
static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(struct pb_buffer *_buf)
|
||||
{
|
||||
/* return radeon_bo. */
|
||||
return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf);
|
||||
}
|
||||
|
||||
static struct pb_buffer *
|
||||
radeon_winsys_bo_create(struct radeon_winsys *rws,
|
||||
unsigned size,
|
||||
|
|
@ -878,55 +748,53 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
|
|||
enum radeon_bo_flag flags)
|
||||
{
|
||||
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
|
||||
struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
|
||||
struct radeon_bo_desc desc;
|
||||
struct pb_manager *provider;
|
||||
struct pb_buffer *buffer;
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.base.alignment = alignment;
|
||||
struct radeon_bo *bo;
|
||||
unsigned usage = 0;
|
||||
|
||||
/* Align size to page size. This is the minimum alignment for normal
|
||||
* BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
|
||||
* like constant/uniform buffers, can benefit from better and more reuse.
|
||||
*/
|
||||
size = align(size, mgr->size_align);
|
||||
size = align(size, ws->size_align);
|
||||
|
||||
/* Only set one usage bit each for domains and flags, or the cache manager
|
||||
* might consider different sets of domains / flags compatible
|
||||
*/
|
||||
if (domain == RADEON_DOMAIN_VRAM_GTT)
|
||||
desc.base.usage = 1 << 2;
|
||||
usage = 1 << 2;
|
||||
else
|
||||
desc.base.usage = domain >> 1;
|
||||
assert(flags < sizeof(desc.base.usage) * 8 - 3);
|
||||
desc.base.usage |= 1 << (flags + 3);
|
||||
usage = domain >> 1;
|
||||
assert(flags < sizeof(usage) * 8 - 3);
|
||||
usage |= 1 << (flags + 3);
|
||||
|
||||
desc.initial_domains = domain;
|
||||
desc.flags = flags;
|
||||
if (use_reusable_pool) {
|
||||
bo = pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage);
|
||||
if (bo)
|
||||
return bo;
|
||||
}
|
||||
|
||||
/* Assign a buffer manager. */
|
||||
if (use_reusable_pool)
|
||||
provider = ws->cman;
|
||||
else
|
||||
provider = ws->kman;
|
||||
bo = radeon_create_bo(ws, size, alignment, usage, domain, flags);
|
||||
if (!bo) {
|
||||
/* Clear the cache and try again. */
|
||||
pb_cache_release_all_buffers(&ws->bo_cache);
|
||||
bo = radeon_create_bo(ws, size, alignment, usage, domain, flags);
|
||||
if (!bo)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer = provider->create_buffer(provider, size, &desc.base);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
bo->use_reusable_pool = use_reusable_pool;
|
||||
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)get_radeon_bo(buffer)->handle, buffer);
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
|
||||
return (struct pb_buffer*)buffer;
|
||||
return &bo->base;
|
||||
}
|
||||
|
||||
static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
|
||||
void *pointer, unsigned size)
|
||||
{
|
||||
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
|
||||
struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
|
||||
struct drm_radeon_gem_userptr args;
|
||||
struct radeon_bo *bo;
|
||||
int r;
|
||||
|
|
@ -947,7 +815,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
|
||||
/* Initialize it. */
|
||||
pipe_reference_init(&bo->base.reference, 1);
|
||||
|
|
@ -956,21 +824,20 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
|
|||
bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
|
||||
bo->base.size = size;
|
||||
bo->base.vtbl = &radeon_bo_vtbl;
|
||||
bo->mgr = mgr;
|
||||
bo->rws = mgr->rws;
|
||||
bo->rws = ws;
|
||||
bo->user_ptr = pointer;
|
||||
bo->va = 0;
|
||||
bo->initial_domain = RADEON_DOMAIN_GTT;
|
||||
pipe_mutex_init(bo->map_mutex);
|
||||
|
||||
util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)bo->handle, bo);
|
||||
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
|
||||
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
|
||||
if (mgr->va) {
|
||||
if (ws->info.r600_virtual_address) {
|
||||
struct drm_radeon_gem_va va;
|
||||
|
||||
bo->va = radeon_bomgr_find_va(mgr, bo->base.size, 1 << 20);
|
||||
bo->va = radeon_bomgr_find_va(rws, bo->base.size, 1 << 20);
|
||||
|
||||
va.handle = bo->handle;
|
||||
va.operation = RADEON_VA_MAP;
|
||||
|
|
@ -986,22 +853,22 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
|
|||
radeon_bo_destroy(&bo->base);
|
||||
return NULL;
|
||||
}
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
|
||||
struct pb_buffer *b = &bo->base;
|
||||
struct radeon_bo *old_bo =
|
||||
util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
pb_reference(&b, &old_bo->base);
|
||||
return b;
|
||||
}
|
||||
|
||||
util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
}
|
||||
|
||||
ws->allocated_gtt += align(bo->base.size, mgr->size_align);
|
||||
ws->allocated_gtt += align(bo->base.size, ws->size_align);
|
||||
|
||||
return (struct pb_buffer*)bo;
|
||||
}
|
||||
|
|
@ -1012,7 +879,6 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
|
|||
{
|
||||
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
|
||||
struct radeon_bo *bo;
|
||||
struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
|
||||
int r;
|
||||
unsigned handle;
|
||||
uint64_t size = 0;
|
||||
|
|
@ -1023,17 +889,17 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
|
|||
* we would hit a deadlock in the kernel.
|
||||
*
|
||||
* The list of pairs is guarded by a mutex, of course. */
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
/* First check if there already is an existing bo for the handle. */
|
||||
bo = util_hash_table_get(mgr->bo_names, (void*)(uintptr_t)whandle->handle);
|
||||
bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
/* We must first get the GEM handle, as fds are unreliable keys */
|
||||
r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
|
||||
if (r)
|
||||
goto fail;
|
||||
bo = util_hash_table_get(mgr->bo_handles, (void*)(uintptr_t)handle);
|
||||
bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
|
||||
} else {
|
||||
/* Unknown handle type */
|
||||
goto fail;
|
||||
|
|
@ -1085,26 +951,25 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
|
|||
bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
|
||||
bo->base.size = (unsigned) size;
|
||||
bo->base.vtbl = &radeon_bo_vtbl;
|
||||
bo->mgr = mgr;
|
||||
bo->rws = mgr->rws;
|
||||
bo->rws = ws;
|
||||
bo->va = 0;
|
||||
pipe_mutex_init(bo->map_mutex);
|
||||
|
||||
if (bo->flink_name)
|
||||
util_hash_table_set(mgr->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
|
||||
util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
|
||||
|
||||
util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)bo->handle, bo);
|
||||
util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
|
||||
|
||||
done:
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
|
||||
if (stride)
|
||||
*stride = whandle->stride;
|
||||
|
||||
if (mgr->va && !bo->va) {
|
||||
if (ws->info.r600_virtual_address && !bo->va) {
|
||||
struct drm_radeon_gem_va va;
|
||||
|
||||
bo->va = radeon_bomgr_find_va(mgr, bo->base.size, 1 << 20);
|
||||
bo->va = radeon_bomgr_find_va(rws, bo->base.size, 1 << 20);
|
||||
|
||||
va.handle = bo->handle;
|
||||
va.operation = RADEON_VA_MAP;
|
||||
|
|
@ -1120,32 +985,32 @@ done:
|
|||
radeon_bo_destroy(&bo->base);
|
||||
return NULL;
|
||||
}
|
||||
pipe_mutex_lock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
|
||||
struct pb_buffer *b = &bo->base;
|
||||
struct radeon_bo *old_bo =
|
||||
util_hash_table_get(mgr->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
util_hash_table_get(ws->bo_vas, (void*)(uintptr_t)va.offset);
|
||||
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
pb_reference(&b, &old_bo->base);
|
||||
return b;
|
||||
}
|
||||
|
||||
util_hash_table_set(mgr->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
util_hash_table_set(ws->bo_vas, (void*)(uintptr_t)bo->va, bo);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
}
|
||||
|
||||
bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
|
||||
|
||||
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
|
||||
ws->allocated_vram += align(bo->base.size, mgr->size_align);
|
||||
ws->allocated_vram += align(bo->base.size, ws->size_align);
|
||||
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
|
||||
ws->allocated_gtt += align(bo->base.size, mgr->size_align);
|
||||
ws->allocated_gtt += align(bo->base.size, ws->size_align);
|
||||
|
||||
return (struct pb_buffer*)bo;
|
||||
|
||||
fail:
|
||||
pipe_mutex_unlock(mgr->bo_handles_mutex);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -1154,32 +1019,32 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
|
|||
struct winsys_handle *whandle)
|
||||
{
|
||||
struct drm_gem_flink flink;
|
||||
struct radeon_bo *bo = get_radeon_bo(buffer);
|
||||
struct radeon_bo *bo = radeon_bo(buffer);
|
||||
struct radeon_drm_winsys *ws = bo->rws;
|
||||
|
||||
memset(&flink, 0, sizeof(flink));
|
||||
|
||||
if ((void*)bo != (void*)buffer)
|
||||
pb_cache_manager_remove_buffer(buffer);
|
||||
bo->use_reusable_pool = false;
|
||||
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
if (!bo->flink_name) {
|
||||
flink.handle = bo->handle;
|
||||
|
||||
if (ioctl(bo->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
|
||||
if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
bo->flink_name = flink.name;
|
||||
|
||||
pipe_mutex_lock(bo->mgr->bo_handles_mutex);
|
||||
util_hash_table_set(bo->mgr->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
|
||||
pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
|
||||
pipe_mutex_lock(ws->bo_handles_mutex);
|
||||
util_hash_table_set(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
|
||||
pipe_mutex_unlock(ws->bo_handles_mutex);
|
||||
}
|
||||
whandle->handle = bo->flink_name;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||||
whandle->handle = bo->handle;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
if (drmPrimeHandleToFD(bo->rws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
|
||||
if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
|
@ -1187,14 +1052,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
static uint64_t radeon_winsys_bo_va(struct radeon_winsys_cs_handle *buf)
|
||||
static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
|
||||
{
|
||||
return ((struct radeon_bo*)buf)->va;
|
||||
}
|
||||
|
||||
void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
|
||||
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
|
||||
{
|
||||
ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
|
||||
ws->base.buffer_set_tiling = radeon_bo_set_tiling;
|
||||
ws->base.buffer_get_tiling = radeon_bo_get_tiling;
|
||||
ws->base.buffer_map = radeon_bo_map;
|
||||
|
|
|
|||
|
|
@ -36,19 +36,10 @@
|
|||
#include "pipebuffer/pb_bufmgr.h"
|
||||
#include "os/os_thread.h"
|
||||
|
||||
struct radeon_bomgr;
|
||||
|
||||
struct radeon_bo_desc {
|
||||
struct pb_desc base;
|
||||
|
||||
unsigned initial_domains;
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
struct radeon_bo {
|
||||
struct pb_buffer base;
|
||||
struct pb_cache_entry cache_entry;
|
||||
|
||||
struct radeon_bomgr *mgr;
|
||||
struct radeon_drm_winsys *rws;
|
||||
void *user_ptr; /* from buffer_from_ptr */
|
||||
|
||||
|
|
@ -60,6 +51,7 @@ struct radeon_bo {
|
|||
uint32_t flink_name;
|
||||
uint64_t va;
|
||||
enum radeon_bo_domain initial_domain;
|
||||
bool use_reusable_pool;
|
||||
|
||||
/* how many command streams is this bo referenced in? */
|
||||
int num_cs_references;
|
||||
|
|
@ -69,8 +61,9 @@ struct radeon_bo {
|
|||
int num_active_ioctls;
|
||||
};
|
||||
|
||||
struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
|
||||
void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws);
|
||||
void radeon_bo_destroy(struct pb_buffer *_buf);
|
||||
bool radeon_bo_can_reclaim(struct pb_buffer *_buf);
|
||||
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws);
|
||||
|
||||
static inline
|
||||
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
|
|||
void (*flush)(void *ctx, unsigned flags,
|
||||
struct pipe_fence_handle **fence),
|
||||
void *flush_ctx,
|
||||
struct radeon_winsys_cs_handle *trace_buf)
|
||||
struct pb_buffer *trace_buf)
|
||||
{
|
||||
struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
|
||||
struct radeon_drm_cs *cs;
|
||||
|
|
@ -322,7 +322,7 @@ static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
|
|||
}
|
||||
|
||||
static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf,
|
||||
struct pb_buffer *buf,
|
||||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_domain domains,
|
||||
enum radeon_bo_priority priority)
|
||||
|
|
@ -342,7 +342,7 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
|
|||
}
|
||||
|
||||
static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *buf)
|
||||
struct pb_buffer *buf)
|
||||
{
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
|
||||
|
|
@ -616,7 +616,7 @@ static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
|
|||
}
|
||||
|
||||
static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
|
||||
struct radeon_winsys_cs_handle *_buf,
|
||||
struct pb_buffer *_buf,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||
|
|
@ -650,7 +650,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
|
|||
fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
|
||||
RADEON_DOMAIN_GTT, 0);
|
||||
/* Add the fence as a dummy relocation. */
|
||||
cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence),
|
||||
cs->ws->base.cs_add_buffer(rcs, fence,
|
||||
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
|
||||
RADEON_PRIO_FENCE);
|
||||
return (struct pipe_fence_handle*)fence;
|
||||
|
|
|
|||
|
|
@ -494,12 +494,18 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
|
|||
pipe_mutex_destroy(ws->cmask_owner_mutex);
|
||||
pipe_mutex_destroy(ws->cs_stack_lock);
|
||||
|
||||
ws->cman->destroy(ws->cman);
|
||||
ws->kman->destroy(ws->kman);
|
||||
pb_cache_deinit(&ws->bo_cache);
|
||||
|
||||
if (ws->gen >= DRV_R600) {
|
||||
radeon_surface_manager_free(ws->surf_man);
|
||||
}
|
||||
|
||||
util_hash_table_destroy(ws->bo_names);
|
||||
util_hash_table_destroy(ws->bo_handles);
|
||||
util_hash_table_destroy(ws->bo_vas);
|
||||
pipe_mutex_destroy(ws->bo_handles_mutex);
|
||||
pipe_mutex_destroy(ws->bo_va_mutex);
|
||||
|
||||
if (ws->fd >= 0)
|
||||
close(ws->fd);
|
||||
|
||||
|
|
@ -698,6 +704,18 @@ static bool radeon_winsys_unref(struct radeon_winsys *ws)
|
|||
return destroy;
|
||||
}
|
||||
|
||||
#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
|
||||
|
||||
static unsigned handle_hash(void *key)
|
||||
{
|
||||
return PTR_TO_UINT(key);
|
||||
}
|
||||
|
||||
static int handle_compare(void *key1, void *key2)
|
||||
{
|
||||
return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
|
||||
}
|
||||
|
||||
PUBLIC struct radeon_winsys *
|
||||
radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||
{
|
||||
|
|
@ -726,15 +744,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
if (!do_winsys_init(ws))
|
||||
goto fail;
|
||||
|
||||
/* Create managers. */
|
||||
ws->kman = radeon_bomgr_create(ws);
|
||||
if (!ws->kman)
|
||||
goto fail;
|
||||
|
||||
ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
|
||||
MIN2(ws->info.vram_size, ws->info.gart_size));
|
||||
if (!ws->cman)
|
||||
goto fail;
|
||||
pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
|
||||
MIN2(ws->info.vram_size, ws->info.gart_size),
|
||||
radeon_bo_destroy,
|
||||
radeon_bo_can_reclaim);
|
||||
|
||||
if (ws->gen >= DRV_R600) {
|
||||
ws->surf_man = radeon_surface_manager_new(ws->fd);
|
||||
|
|
@ -753,7 +766,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
ws->base.query_value = radeon_query_value;
|
||||
ws->base.read_registers = radeon_read_registers;
|
||||
|
||||
radeon_bomgr_init_functions(ws);
|
||||
radeon_drm_bo_init_functions(ws);
|
||||
radeon_drm_cs_init_functions(ws);
|
||||
radeon_surface_init_functions(ws);
|
||||
|
||||
|
|
@ -761,6 +774,17 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
pipe_mutex_init(ws->cmask_owner_mutex);
|
||||
pipe_mutex_init(ws->cs_stack_lock);
|
||||
|
||||
ws->bo_names = util_hash_table_create(handle_hash, handle_compare);
|
||||
ws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
|
||||
ws->bo_vas = util_hash_table_create(handle_hash, handle_compare);
|
||||
pipe_mutex_init(ws->bo_handles_mutex);
|
||||
pipe_mutex_init(ws->bo_va_mutex);
|
||||
ws->va_offset = ws->va_start;
|
||||
list_inithead(&ws->va_holes);
|
||||
|
||||
/* TTM aligns the BO size to the CPU page size */
|
||||
ws->size_align = sysconf(_SC_PAGESIZE);
|
||||
|
||||
ws->ncs = 0;
|
||||
pipe_semaphore_init(&ws->cs_queued, 0);
|
||||
if (ws->num_cpus > 1 && debug_get_option_thread())
|
||||
|
|
@ -789,10 +813,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
|||
|
||||
fail:
|
||||
pipe_mutex_unlock(fd_tab_mutex);
|
||||
if (ws->cman)
|
||||
ws->cman->destroy(ws->cman);
|
||||
if (ws->kman)
|
||||
ws->kman->destroy(ws->kman);
|
||||
pb_cache_deinit(&ws->bo_cache);
|
||||
if (ws->surf_man)
|
||||
radeon_surface_manager_free(ws->surf_man);
|
||||
if (ws->fd >= 0)
|
||||
|
|
|
|||
|
|
@ -31,7 +31,9 @@
|
|||
#define RADEON_DRM_WINSYS_H
|
||||
|
||||
#include "gallium/drivers/radeon/radeon_winsys.h"
|
||||
#include "pipebuffer/pb_cache.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "util/list.h"
|
||||
#include <radeon_drm.h>
|
||||
|
||||
#ifndef DRM_RADEON_GEM_USERPTR
|
||||
|
|
@ -63,6 +65,7 @@ enum radeon_generation {
|
|||
struct radeon_drm_winsys {
|
||||
struct radeon_winsys base;
|
||||
struct pipe_reference reference;
|
||||
struct pb_cache bo_cache;
|
||||
|
||||
int fd; /* DRM file descriptor */
|
||||
int num_cs; /* The number of command streams created. */
|
||||
|
|
@ -77,8 +80,21 @@ struct radeon_drm_winsys {
|
|||
uint32_t va_unmap_working;
|
||||
uint32_t accel_working2;
|
||||
|
||||
struct pb_manager *kman;
|
||||
struct pb_manager *cman;
|
||||
/* List of buffer GEM names. Protected by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_names;
|
||||
/* List of buffer handles. Protectded by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_handles;
|
||||
/* List of buffer virtual memory ranges. Protectded by bo_handles_mutex. */
|
||||
struct util_hash_table *bo_vas;
|
||||
pipe_mutex bo_handles_mutex;
|
||||
pipe_mutex bo_va_mutex;
|
||||
|
||||
uint64_t va_offset;
|
||||
struct list_head va_holes;
|
||||
|
||||
/* BO size alignment */
|
||||
unsigned size_align;
|
||||
|
||||
struct radeon_surface_manager *surf_man;
|
||||
|
||||
uint32_t num_cpus; /* Number of CPUs. */
|
||||
|
|
|
|||
|
|
@ -217,7 +217,7 @@ nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
|
|||
int position = -1;
|
||||
int maxloc = -1;
|
||||
nir_ssa_def *cv;
|
||||
nir_variable *out[2];
|
||||
nir_variable *out[2] = { NULL };
|
||||
|
||||
if (!ucp_enables)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -704,6 +704,9 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
|||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
||||
/**
|
||||
* Fill out local id payload for compute shader according to cs_prog_data.
|
||||
*/
|
||||
void
|
||||
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
|
||||
void *buffer, uint32_t threads, uint32_t stride);
|
||||
|
|
|
|||
|
|
@ -215,6 +215,33 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
|
|||
}
|
||||
}
|
||||
|
||||
/* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
|
||||
* single-sampled color renderbuffers because the CCS buffer isn't
|
||||
* supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
|
||||
* enabled because otherwise the surface state will be programmed with the
|
||||
* linear equivalent format anyway.
|
||||
*/
|
||||
if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
|
||||
struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
|
||||
|
||||
if (rb == NULL)
|
||||
continue;
|
||||
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
|
||||
struct intel_mipmap_tree *mt = irb->mt;
|
||||
|
||||
if (mt == NULL ||
|
||||
mt->num_samples > 1 ||
|
||||
_mesa_get_srgb_format_linear(mt->format) == mt->format)
|
||||
continue;
|
||||
|
||||
intel_miptree_resolve_color(brw, mt);
|
||||
brw_render_cache_set_check_flush(brw, mt->bo);
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_lock_context_textures(ctx);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,42 +34,6 @@
|
|||
#include "brw_program.h"
|
||||
#include "glsl/ir_uniform.h"
|
||||
|
||||
void
|
||||
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
|
||||
void *buffer, uint32_t threads, uint32_t stride)
|
||||
{
|
||||
if (prog_data->local_invocation_id_regs == 0)
|
||||
return;
|
||||
|
||||
/* 'stride' should be an integer number of registers, that is, a multiple
|
||||
* of 32 bytes.
|
||||
*/
|
||||
assert(stride % 32 == 0);
|
||||
|
||||
unsigned x = 0, y = 0, z = 0;
|
||||
for (unsigned t = 0; t < threads; t++) {
|
||||
uint32_t *param = (uint32_t *) buffer + stride * t / 4;
|
||||
|
||||
for (unsigned i = 0; i < prog_data->simd_size; i++) {
|
||||
param[0 * prog_data->simd_size + i] = x;
|
||||
param[1 * prog_data->simd_size + i] = y;
|
||||
param[2 * prog_data->simd_size + i] = z;
|
||||
|
||||
x++;
|
||||
if (x == prog_data->local_size[0]) {
|
||||
x = 0;
|
||||
y++;
|
||||
if (y == prog_data->local_size[1]) {
|
||||
y = 0;
|
||||
z++;
|
||||
if (z == prog_data->local_size[2])
|
||||
z = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
assign_cs_binding_table_offsets(const struct brw_device_info *devinfo,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
|
|
|
|||
|
|
@ -5715,3 +5715,39 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
|||
|
||||
return g.get_assembly(final_assembly_size);
|
||||
}
|
||||
|
||||
void
|
||||
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
|
||||
void *buffer, uint32_t threads, uint32_t stride)
|
||||
{
|
||||
if (prog_data->local_invocation_id_regs == 0)
|
||||
return;
|
||||
|
||||
/* 'stride' should be an integer number of registers, that is, a multiple
|
||||
* of 32 bytes.
|
||||
*/
|
||||
assert(stride % 32 == 0);
|
||||
|
||||
unsigned x = 0, y = 0, z = 0;
|
||||
for (unsigned t = 0; t < threads; t++) {
|
||||
uint32_t *param = (uint32_t *) buffer + stride * t / 4;
|
||||
|
||||
for (unsigned i = 0; i < prog_data->simd_size; i++) {
|
||||
param[0 * prog_data->simd_size + i] = x;
|
||||
param[1 * prog_data->simd_size + i] = y;
|
||||
param[2 * prog_data->simd_size + i] = z;
|
||||
|
||||
x++;
|
||||
if (x == prog_data->local_size[0]) {
|
||||
x = 0;
|
||||
y++;
|
||||
if (y == prog_data->local_size[1]) {
|
||||
y = 0;
|
||||
z++;
|
||||
if (z == prog_data->local_size[2])
|
||||
z = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -505,8 +505,21 @@ fast_clear_attachments(struct brw_context *brw,
|
|||
uint32_t fast_clear_buffers,
|
||||
struct rect fast_clear_rect)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const bool srgb_enabled = ctx->Color.sRGBEnabled;
|
||||
|
||||
assert(brw->gen >= 9);
|
||||
|
||||
/* Make sure the GL_FRAMEBUFFER_SRGB is disabled during fast clear so that
|
||||
* the surface state will always be uploaded with a linear buffer. SRGB
|
||||
* buffers are not supported on Gen9 because they are not marked as
|
||||
* losslessly compressible. This shouldn't matter for the fast clear
|
||||
* because the color is not written to the framebuffer yet so the hardware
|
||||
* doesn't need to do any SRGB conversion.
|
||||
*/
|
||||
if (srgb_enabled)
|
||||
_mesa_set_framebuffer_srgb(ctx, GL_FALSE);
|
||||
|
||||
brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
|
||||
|
||||
/* SKL+ also has a resolve mode for compressed render targets and thus more
|
||||
|
|
@ -533,6 +546,9 @@ fast_clear_attachments(struct brw_context *brw,
|
|||
}
|
||||
|
||||
set_fast_clear_op(brw, 0);
|
||||
|
||||
if (srgb_enabled)
|
||||
_mesa_set_framebuffer_srgb(ctx, GL_TRUE);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -587,6 +603,17 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
|||
brw->render_target_format[irb->mt->format])
|
||||
clear_type = REP_CLEAR;
|
||||
|
||||
/* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When
|
||||
* GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be
|
||||
* resolved in intel_update_state. In that case it's pointless to do a
|
||||
* fast clear because it's very likely to be immediately resolved.
|
||||
*/
|
||||
if (brw->gen >= 9 &&
|
||||
irb->mt->num_samples <= 1 &&
|
||||
brw->ctx.Color.sRGBEnabled &&
|
||||
_mesa_get_srgb_format_linear(irb->mt->format) != irb->mt->format)
|
||||
clear_type = REP_CLEAR;
|
||||
|
||||
if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS)
|
||||
clear_type = REP_CLEAR;
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,11 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
|
|||
pitch = mt->pitch;
|
||||
}
|
||||
|
||||
if (mt->mcs_mt) {
|
||||
/* The MCS is not uploaded for single-sampled surfaces because the color
|
||||
* buffer should always have been resolved before it is used as a texture
|
||||
* so there is no need for it.
|
||||
*/
|
||||
if (mt->mcs_mt && mt->num_samples > 1) {
|
||||
aux_mt = mt->mcs_mt;
|
||||
aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
|
||||
|
||||
|
|
|
|||
|
|
@ -259,7 +259,8 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
|
|||
return false;
|
||||
|
||||
if (brw->gen >= 9) {
|
||||
const uint32_t brw_format = brw_format_for_mesa_format(mt->format);
|
||||
mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
|
||||
const uint32_t brw_format = brw_format_for_mesa_format(linear_format);
|
||||
return brw_losslessly_compressible_format(brw, brw_format);
|
||||
} else
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -852,13 +852,18 @@ program_resource_location(struct gl_shader_program *shProg,
|
|||
* and user-defined attributes.
|
||||
*/
|
||||
switch (res->Type) {
|
||||
case GL_PROGRAM_INPUT:
|
||||
case GL_PROGRAM_INPUT: {
|
||||
const ir_variable *var = RESOURCE_VAR(res);
|
||||
|
||||
/* If the input is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
&& array_index >= RESOURCE_VAR(res)->type->length) {
|
||||
&& array_index >= var->type->length) {
|
||||
return -1;
|
||||
}
|
||||
return RESOURCE_VAR(res)->data.location + array_index - VERT_ATTRIB_GENERIC0;
|
||||
return (var->data.location +
|
||||
(array_index * var->type->without_array()->matrix_columns) -
|
||||
VERT_ATTRIB_GENERIC0);
|
||||
}
|
||||
case GL_PROGRAM_OUTPUT:
|
||||
/* If the output is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue