mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-06 07:50:30 +01:00
r600g: compute support for evergreen
Tom Stellard:
- Updated for gallium interface changes
- Fixed a few bugs:
+ Set the loop counter
+ Calculate the correct number of pipes
- Added hooks into the LLVM compiler
This commit is contained in:
parent
46a13b3b11
commit
6a829a1b72
21 changed files with 2680 additions and 13 deletions
|
|
@ -1993,13 +1993,18 @@ if test "x$with_gallium_drivers" != x; then
|
|||
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
|
||||
gallium_require_drm_loader
|
||||
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
|
||||
if test "x$enable_r600_llvm" = xyes; then
|
||||
if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then
|
||||
if test "x$LLVM_VERSION" != "x3.1"; then
|
||||
AC_MSG_ERROR([LLVM 3.1 is required for the r600 llvm compiler.])
|
||||
fi
|
||||
NEED_RADEON_GALLIUM=yes;
|
||||
fi
|
||||
if test "x$enable_r600_llvm" = xyes; then
|
||||
USE_R600_LLVM_COMPILER=yes;
|
||||
fi
|
||||
if test "x$enable_opencl" = xyes -a "x$with_llvm_shared_libs" = xno; then
|
||||
LLVM_LIBS="${LLVM_LIBS} `llvm-config --libs bitreader asmparser`"
|
||||
fi
|
||||
gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
|
||||
;;
|
||||
xradeonsi)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ AM_CFLAGS = \
|
|||
libr600_a_SOURCES = \
|
||||
$(C_SOURCES)
|
||||
|
||||
if USE_R600_LLVM_COMPILER
|
||||
if NEED_RADEON_GALLIUM
|
||||
|
||||
# This is a hack until we can move the backend into the LLVM project.
|
||||
# We need to use mklib, because it splits up libradeon.a into object files
|
||||
|
|
@ -26,18 +26,28 @@ if USE_R600_LLVM_COMPILER
|
|||
libr600_a_AR = $(top_srcdir)/bin/mklib -o r600 -static
|
||||
|
||||
libr600_a_SOURCES += \
|
||||
$(LLVM_C_SOURCES)
|
||||
$(LLVM_C_SOURCES) \
|
||||
$(LLVM_CXX_SOURCES)
|
||||
|
||||
libr600_a_LIBADD = \
|
||||
$(top_builddir)/src/gallium/drivers/radeon/libradeon.a
|
||||
|
||||
AM_CFLAGS += \
|
||||
$(LLVM_CFLAGS) \
|
||||
-I$(top_srcdir)/src/gallium/drivers/radeon/ \
|
||||
-DR600_USE_LLVM
|
||||
-I$(top_srcdir)/src/gallium/drivers/radeon/
|
||||
|
||||
AM_CXXFLAGS= \
|
||||
$(LLVM_CXXFLAGS)
|
||||
else
|
||||
libr600_a_AR = $(AR) $(ARFLAGS)
|
||||
endif
|
||||
|
||||
if USE_R600_LLVM_COMPILER
|
||||
AM_CFLAGS += \
|
||||
-DR600_USE_LLVM
|
||||
endif
|
||||
|
||||
if HAVE_GALLIUM_COMPUTE
|
||||
AM_CFLAGS += \
|
||||
-DHAVE_OPENCL
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -14,6 +14,10 @@ C_SOURCES = \
|
|||
evergreen_state.c \
|
||||
eg_asm.c \
|
||||
r600_translate.c \
|
||||
r600_state_common.c
|
||||
r600_state_common.c \
|
||||
evergreen_compute.c \
|
||||
evergreen_compute_internal.c \
|
||||
compute_memory_pool.c
|
||||
|
||||
LLVM_C_SOURCES = r600_llvm.c
|
||||
LLVM_CXX_SOURCES = llvm_wrapper.cpp
|
||||
|
|
|
|||
397
src/gallium/drivers/r600/compute_memory_pool.c
Normal file
397
src/gallium/drivers/r600/compute_memory_pool.c
Normal file
|
|
@ -0,0 +1,397 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/u_double_list.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_framebuffer.h"
|
||||
#include "r600.h"
|
||||
#include "r600_resource.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_pipe.h"
|
||||
#include "r600_formats.h"
|
||||
#include "compute_memory_pool.h"
|
||||
#include "evergreen_compute_internal.h"
|
||||
|
||||
/**
|
||||
* Creates a new pool
|
||||
*/
|
||||
struct compute_memory_pool* compute_memory_pool_new(
|
||||
int64_t initial_size_in_dw,
|
||||
struct r600_screen * rscreen)
|
||||
{
|
||||
struct compute_memory_pool* pool = (struct compute_memory_pool*)
|
||||
CALLOC(sizeof(struct compute_memory_pool), 1);
|
||||
|
||||
pool->next_id = 1;
|
||||
pool->size_in_dw = initial_size_in_dw;
|
||||
pool->screen = rscreen;
|
||||
pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
|
||||
pool->screen, pool->size_in_dw*4);
|
||||
pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
|
||||
|
||||
return pool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees all stuff in the pool and the pool struct itself too
|
||||
*/
|
||||
void compute_memory_pool_delete(struct compute_memory_pool* pool)
|
||||
{
|
||||
free(pool->shadow);
|
||||
pool->screen->screen.resource_destroy((struct pipe_screen *)
|
||||
pool->screen, (struct pipe_resource *)pool->bo);
|
||||
free(pool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for an empty space in the pool, return with the pointer to the
|
||||
* allocatable space in the pool, returns -1 on failure.
|
||||
*/
|
||||
int64_t compute_memory_prealloc_chunk(
|
||||
struct compute_memory_pool* pool,
|
||||
int64_t size_in_dw)
|
||||
{
|
||||
assert(size_in_dw <= pool->size_in_dw);
|
||||
|
||||
struct compute_memory_item *item;
|
||||
|
||||
int last_end = 0;
|
||||
|
||||
for (item = pool->item_list; item; item = item->next) {
|
||||
if (item->start_in_dw > -1) {
|
||||
if (item->start_in_dw-last_end > size_in_dw) {
|
||||
return last_end;
|
||||
}
|
||||
|
||||
last_end = item->start_in_dw + item->size_in_dw;
|
||||
last_end += (1024 - last_end % 1024);
|
||||
}
|
||||
}
|
||||
|
||||
if (pool->size_in_dw - last_end < size_in_dw) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return last_end;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for the chunk where we can link our new chunk after it.
|
||||
*/
|
||||
struct compute_memory_item* compute_memory_postalloc_chunk(
|
||||
struct compute_memory_pool* pool,
|
||||
int64_t start_in_dw)
|
||||
{
|
||||
struct compute_memory_item* item;
|
||||
|
||||
for (item = pool->item_list; item; item = item->next) {
|
||||
if (item->next) {
|
||||
if (item->start_in_dw < start_in_dw
|
||||
&& item->next->start_in_dw > start_in_dw) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* end of chain */
|
||||
assert(item->start_in_dw < start_in_dw);
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
assert(0 && "unreachable");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reallocates pool, conserves data
|
||||
*/
|
||||
void compute_memory_grow_pool(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe, int new_size_in_dw)
|
||||
{
|
||||
assert(new_size_in_dw >= pool->size_in_dw);
|
||||
|
||||
new_size_in_dw += 1024 - (new_size_in_dw % 1024);
|
||||
|
||||
compute_memory_shadow(pool, pipe, 1);
|
||||
pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
|
||||
pool->size_in_dw = new_size_in_dw;
|
||||
pool->screen->screen.resource_destroy(
|
||||
(struct pipe_screen *)pool->screen,
|
||||
(struct pipe_resource *)pool->bo);
|
||||
pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
|
||||
pool->size_in_dw*4);
|
||||
compute_memory_shadow(pool, pipe, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy pool from device to host, or host to device.
|
||||
*/
|
||||
void compute_memory_shadow(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe, int device_to_host)
|
||||
{
|
||||
struct compute_memory_item chunk;
|
||||
|
||||
chunk.id = 0;
|
||||
chunk.start_in_dw = 0;
|
||||
chunk.size_in_dw = pool->size_in_dw;
|
||||
chunk.prev = chunk.next = NULL;
|
||||
compute_memory_transfer(pool, pipe, device_to_host, &chunk,
|
||||
pool->shadow, 0, pool->size_in_dw*4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocates pending allocations in the pool
|
||||
*/
|
||||
void compute_memory_finalize_pending(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe)
|
||||
{
|
||||
struct compute_memory_item *pending_list = NULL, *end_p = NULL;
|
||||
struct compute_memory_item *item, *next;
|
||||
|
||||
int64_t allocated = 0;
|
||||
int64_t unallocated = 0;
|
||||
|
||||
for (item = pool->item_list; item; item = item->next) {
|
||||
COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
|
||||
}
|
||||
|
||||
for (item = pool->item_list; item; item = next) {
|
||||
next = item->next;
|
||||
|
||||
|
||||
if (item->start_in_dw == -1) {
|
||||
if (end_p) {
|
||||
end_p->next = item;
|
||||
}
|
||||
else {
|
||||
pending_list = item;
|
||||
}
|
||||
|
||||
if (item->prev) {
|
||||
item->prev->next = next;
|
||||
}
|
||||
else {
|
||||
pool->item_list = next;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
next->prev = item->prev;
|
||||
}
|
||||
|
||||
item->prev = end_p;
|
||||
item->next = NULL;
|
||||
end_p = item;
|
||||
|
||||
unallocated += item->size_in_dw+1024;
|
||||
}
|
||||
else {
|
||||
allocated += item->size_in_dw;
|
||||
}
|
||||
}
|
||||
|
||||
if (pool->size_in_dw < allocated+unallocated) {
|
||||
compute_memory_grow_pool(pool, pipe, allocated+unallocated);
|
||||
}
|
||||
|
||||
for (item = pending_list; item; item = next) {
|
||||
next = item->next;
|
||||
|
||||
int64_t start_in_dw;
|
||||
|
||||
while ((start_in_dw=compute_memory_prealloc_chunk(pool,
|
||||
item->size_in_dw)) == -1) {
|
||||
int64_t need = item->size_in_dw+2048 -
|
||||
(pool->size_in_dw - allocated);
|
||||
|
||||
need += 1024 - (need % 1024);
|
||||
|
||||
if (need > 0) {
|
||||
compute_memory_grow_pool(pool,
|
||||
pipe,
|
||||
pool->size_in_dw + need);
|
||||
}
|
||||
else {
|
||||
need = pool->size_in_dw / 10;
|
||||
need += 1024 - (need % 1024);
|
||||
compute_memory_grow_pool(pool,
|
||||
pipe,
|
||||
pool->size_in_dw + need);
|
||||
}
|
||||
}
|
||||
|
||||
item->start_in_dw = start_in_dw;
|
||||
item->next = NULL;
|
||||
item->prev = NULL;
|
||||
|
||||
if (pool->item_list) {
|
||||
struct compute_memory_item *pos;
|
||||
|
||||
pos = compute_memory_postalloc_chunk(pool, start_in_dw);
|
||||
item->prev = pos;
|
||||
item->next = pos->next;
|
||||
pos->next = item;
|
||||
|
||||
if (item->next) {
|
||||
item->next->prev = item;
|
||||
}
|
||||
}
|
||||
else {
|
||||
pool->item_list = item;
|
||||
}
|
||||
|
||||
allocated += item->size_in_dw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
|
||||
{
|
||||
struct compute_memory_item *item, *next;
|
||||
|
||||
for (item = pool->item_list; item; item = next) {
|
||||
next = item->next;
|
||||
|
||||
if (item->id == id) {
|
||||
if (item->prev) {
|
||||
item->prev->next = item->next;
|
||||
}
|
||||
else {
|
||||
pool->item_list = item->next;
|
||||
}
|
||||
|
||||
if (item->next) {
|
||||
item->next->prev = item->prev;
|
||||
}
|
||||
|
||||
free(item);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Internal error, invalid id %ld "
|
||||
"for compute_memory_free\n", id);
|
||||
|
||||
assert(0 && "error");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates pending allocations
|
||||
*/
|
||||
struct compute_memory_item* compute_memory_alloc(
|
||||
struct compute_memory_pool* pool,
|
||||
int64_t size_in_dw)
|
||||
{
|
||||
struct compute_memory_item *new_item;
|
||||
|
||||
COMPUTE_DBG("Alloc: %i\n", size_in_dw);
|
||||
|
||||
new_item = (struct compute_memory_item *)
|
||||
CALLOC(sizeof(struct compute_memory_item), 1);
|
||||
new_item->size_in_dw = size_in_dw;
|
||||
new_item->start_in_dw = -1; /* mark pending */
|
||||
new_item->id = pool->next_id++;
|
||||
new_item->pool = pool;
|
||||
|
||||
struct compute_memory_item *last_item;
|
||||
|
||||
if (pool->item_list) {
|
||||
for (last_item = pool->item_list; last_item->next;
|
||||
last_item = last_item->next);
|
||||
|
||||
last_item->next = new_item;
|
||||
new_item->prev = last_item;
|
||||
}
|
||||
else {
|
||||
pool->item_list = new_item;
|
||||
}
|
||||
|
||||
return new_item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer data host<->device, offset and size is in bytes
|
||||
*/
|
||||
void compute_memory_transfer(
|
||||
struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe,
|
||||
int device_to_host,
|
||||
struct compute_memory_item* chunk,
|
||||
void* data,
|
||||
int offset_in_chunk,
|
||||
int size)
|
||||
{
|
||||
int64_t aligned_size = pool->size_in_dw;
|
||||
struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
|
||||
int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
|
||||
|
||||
struct pipe_transfer *xfer;
|
||||
uint32_t *map;
|
||||
|
||||
if (device_to_host)
|
||||
{
|
||||
xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
|
||||
&(struct pipe_box) { .width = aligned_size,
|
||||
.height = 1, .depth = 1 });
|
||||
assert(xfer);
|
||||
map = pipe->transfer_map(pipe, xfer);
|
||||
assert(map);
|
||||
memcpy(data, map + internal_offset, size);
|
||||
pipe->transfer_unmap(pipe, xfer);
|
||||
pipe->transfer_destroy(pipe, xfer);
|
||||
} else {
|
||||
xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
|
||||
&(struct pipe_box) { .width = aligned_size,
|
||||
.height = 1, .depth = 1 });
|
||||
assert(xfer);
|
||||
map = pipe->transfer_map(pipe, xfer);
|
||||
assert(map);
|
||||
memcpy(map + internal_offset, data, size);
|
||||
pipe->transfer_unmap(pipe, xfer);
|
||||
pipe->transfer_destroy(pipe, xfer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer data between chunk<->data, it is for VRAM<->GART transfers
|
||||
*/
|
||||
void compute_memory_transfer_direct(
|
||||
struct compute_memory_pool* pool,
|
||||
int chunk_to_data,
|
||||
struct compute_memory_item* chunk,
|
||||
struct r600_resource* data,
|
||||
int offset_in_chunk,
|
||||
int offset_in_data,
|
||||
int size)
|
||||
{
|
||||
///TODO: DMA
|
||||
}
|
||||
98
src/gallium/drivers/r600/compute_memory_pool.h
Normal file
98
src/gallium/drivers/r600/compute_memory_pool.h
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#ifndef COMPUTE_MEMORY_POOL
|
||||
#define COMPUTE_MEMORY_POOL
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
struct compute_memory_pool;
|
||||
|
||||
struct compute_memory_item
|
||||
{
|
||||
int64_t id; ///ID of the memory chunk
|
||||
|
||||
int untouched; ///True if the memory contains only junk, no need to save it for defrag
|
||||
|
||||
int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
|
||||
int64_t size_in_dw; ///Size of the chunk in dwords
|
||||
|
||||
struct compute_memory_pool* pool;
|
||||
|
||||
struct compute_memory_item* prev;
|
||||
struct compute_memory_item* next;
|
||||
};
|
||||
|
||||
struct compute_memory_pool
|
||||
{
|
||||
int64_t next_id; ///For generating unique IDs for memory chunks
|
||||
int64_t size_in_dw; ///Size of the pool in dwords
|
||||
|
||||
struct r600_resource *bo; ///The pool buffer object resource
|
||||
struct compute_memory_item* item_list; ///Allocated memory chunks in the buffer,they must be ordered by "start_in_dw"
|
||||
struct r600_screen *screen;
|
||||
|
||||
uint32_t *shadow; ///host copy of the pool, used for defragmentation
|
||||
};
|
||||
|
||||
|
||||
struct compute_memory_pool* compute_memory_pool_new(int64_t initial_size_in_dw, struct r600_screen *rscreen); ///Creates a new pool
|
||||
void compute_memory_pool_delete(struct compute_memory_pool* pool); ///Frees all stuff in the pool and the pool struct itself too
|
||||
|
||||
int64_t compute_memory_prealloc_chunk(struct compute_memory_pool* pool, int64_t size_in_dw); ///searches for an empty space in the pool, return with the pointer to the allocatable space in the pool, returns -1 on failure
|
||||
|
||||
struct compute_memory_item* compute_memory_postalloc_chunk(struct compute_memory_pool* pool, int64_t start_in_dw); ///search for the chunk where we can link our new chunk after it
|
||||
|
||||
/**
|
||||
* reallocates pool, conserves data
|
||||
*/
|
||||
void compute_memory_grow_pool(struct compute_memory_pool* pool, struct pipe_context * pipe,
|
||||
int new_size_in_dw);
|
||||
|
||||
/**
|
||||
* Copy pool from device to host, or host to device
|
||||
*/
|
||||
void compute_memory_shadow(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe, int device_to_host);
|
||||
|
||||
/**
|
||||
* Allocates pending allocations in the pool
|
||||
*/
|
||||
void compute_memory_finalize_pending(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe);
|
||||
void compute_memory_defrag(struct compute_memory_pool* pool); ///Defragment the memory pool, always heavy memory usage
|
||||
void compute_memory_free(struct compute_memory_pool* pool, int64_t id);
|
||||
struct compute_memory_item* compute_memory_alloc(struct compute_memory_pool* pool, int64_t size_in_dw); ///Creates pending allocations
|
||||
|
||||
/**
|
||||
* Transfer data host<->device, offset and size is in bytes
|
||||
*/
|
||||
void compute_memory_transfer(struct compute_memory_pool* pool,
|
||||
struct pipe_context * pipe, int device_to_host,
|
||||
struct compute_memory_item* chunk, void* data,
|
||||
int offset_in_chunk, int size);
|
||||
|
||||
void compute_memory_transfer_direct(struct compute_memory_pool* pool, int chunk_to_data, struct compute_memory_item* chunk, struct r600_resource* data, int offset_in_chunk, int offset_in_data, int size); ///Transfer data between chunk<->data, it is for VRAM<->GART transfers
|
||||
|
||||
#endif
|
||||
38
src/gallium/drivers/r600/compute_resource.def
Normal file
38
src/gallium/drivers/r600/compute_resource.def
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
|
||||
DECL_COMPUTE_RESOURCE(CONFIG, 1)
|
||||
DECL_COMPUTE_RESOURCE(CONST_MEM, 16)
|
||||
DECL_COMPUTE_RESOURCE(RAT, 12)
|
||||
DECL_COMPUTE_RESOURCE(VERT, 16)
|
||||
DECL_COMPUTE_RESOURCE(TEX, 16)
|
||||
DECL_COMPUTE_RESOURCE(SAMPLER, 18)
|
||||
DECL_COMPUTE_RESOURCE(LOOP, 32)
|
||||
DECL_COMPUTE_RESOURCE(LDS, 1)
|
||||
DECL_COMPUTE_RESOURCE(GDS, 1)
|
||||
DECL_COMPUTE_RESOURCE(EXPORT, 1)
|
||||
DECL_COMPUTE_RESOURCE(SHADER, 1)
|
||||
DECL_COMPUTE_RESOURCE(TMPRING, 4)
|
||||
DECL_COMPUTE_RESOURCE(DISPATCH, 1)
|
||||
814
src/gallium/drivers/r600/evergreen_compute.c
Normal file
814
src/gallium/drivers/r600/evergreen_compute.c
Normal file
|
|
@ -0,0 +1,814 @@
|
|||
/*
|
||||
* Copyright 2011 Adam Rak <adam.rak@streamnovation.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/u_double_list.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_framebuffer.h"
|
||||
#include "pipebuffer/pb_buffer.h"
|
||||
#include "r600.h"
|
||||
#include "evergreend.h"
|
||||
#include "r600_resource.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_pipe.h"
|
||||
#include "r600_formats.h"
|
||||
#include "evergreen_compute.h"
|
||||
#include "r600_hw_context_priv.h"
|
||||
#include "evergreen_compute_internal.h"
|
||||
#include "compute_memory_pool.h"
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "llvm_wrapper.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
RAT0 is for global binding write
|
||||
VTX1 is for global binding read
|
||||
|
||||
for wrting images RAT1...
|
||||
for reading images TEX2...
|
||||
TEX2-RAT1 is paired
|
||||
|
||||
TEX2... consumes the same fetch resources, that VTX2... would consume
|
||||
|
||||
CONST0 and VTX0 is for parameters
|
||||
CONST0 is binding smaller input parameter buffer, and for constant indexing,
|
||||
also constant cached
|
||||
VTX0 is for indirect/non-constant indexing, or if the input is bigger than
|
||||
the constant cache can handle
|
||||
|
||||
RAT-s are limited to 12, so we can only bind at most 11 texture for writing
|
||||
because we reserve RAT0 for global bindings. With byteaddressing enabled,
|
||||
we should reserve another one too.=> 10 image binding for writing max.
|
||||
|
||||
from Nvidia OpenCL:
|
||||
CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
|
||||
CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
|
||||
|
||||
so 10 for writing is enough. 176 is the max for reading according to the docs
|
||||
|
||||
writable images should be listed first < 10, so their id corresponds to RAT(id+1)
|
||||
writable images will consume TEX slots, VTX slots too because of linear indexing
|
||||
|
||||
*/
|
||||
|
||||
const struct u_resource_vtbl r600_global_buffer_vtbl =
|
||||
{
|
||||
u_default_resource_get_handle, /* get_handle */
|
||||
r600_compute_global_buffer_destroy, /* resource_destroy */
|
||||
r600_compute_global_get_transfer, /* get_transfer */
|
||||
r600_compute_global_transfer_destroy, /* transfer_destroy */
|
||||
r600_compute_global_transfer_map, /* transfer_map */
|
||||
r600_compute_global_transfer_flush_region,/* transfer_flush_region */
|
||||
r600_compute_global_transfer_unmap, /* transfer_unmap */
|
||||
r600_compute_global_transfer_inline_write /* transfer_inline_write */
|
||||
};
|
||||
|
||||
|
||||
void *evergreen_create_compute_state(
|
||||
struct pipe_context *ctx_,
|
||||
const const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
const struct pipe_llvm_program_header * header;
|
||||
const unsigned char * code;
|
||||
|
||||
header = cso->prog;
|
||||
code = cso->prog + sizeof(struct pipe_llvm_program_header);
|
||||
#endif
|
||||
|
||||
if (!ctx->screen->screen.get_param(&ctx->screen->screen,
|
||||
PIPE_CAP_COMPUTE)) {
|
||||
fprintf(stderr, "Compute is not supported\n");
|
||||
return NULL;
|
||||
}
|
||||
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
|
||||
|
||||
shader->ctx = (struct r600_context*)ctx;
|
||||
shader->resources = (struct evergreen_compute_resource*)
|
||||
CALLOC(sizeof(struct evergreen_compute_resource),
|
||||
get_compute_resource_num());
|
||||
shader->local_size = cso->req_local_mem; ///TODO: assert it
|
||||
shader->private_size = cso->req_private_mem;
|
||||
shader->input_size = cso->req_input_mem;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
shader->mod = llvm_parse_bitcode(code, header->num_bytes);
|
||||
|
||||
r600_compute_shader_create(ctx_, shader->mod, &shader->bc);
|
||||
#endif
|
||||
return shader;
|
||||
}
|
||||
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
|
||||
{
|
||||
struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
|
||||
|
||||
free(shader->resources);
|
||||
free(shader);
|
||||
}
|
||||
|
||||
static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
|
||||
ctx->cs_shader = (struct r600_pipe_compute *)state;
|
||||
|
||||
assert(!ctx->cs_shader->shader_code_bo);
|
||||
|
||||
ctx->cs_shader->shader_code_bo =
|
||||
r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
ctx->cs_shader->bc.ndw * 4);
|
||||
|
||||
void *p = ctx->ws->buffer_map(ctx->cs_shader->shader_code_bo->cs_buf,
|
||||
ctx->cs, PIPE_TRANSFER_WRITE);
|
||||
|
||||
memcpy(p, ctx->cs_shader->bc.bytecode, ctx->cs_shader->bc.ndw * 4);
|
||||
|
||||
ctx->ws->buffer_unmap(ctx->cs_shader->shader_code_bo->cs_buf);
|
||||
|
||||
evergreen_compute_init_config(ctx);
|
||||
|
||||
struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
|
||||
COMPUTE_RESOURCE_SHADER, 0);
|
||||
|
||||
evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3,
|
||||
S_008C0C_NUM_LS_GPRS(ctx->cs_shader->bc.ngpr));
|
||||
|
||||
///maybe we can use it later
|
||||
evergreen_reg_set(res, R_0286C8_SPI_THREAD_GROUPING, 0);
|
||||
///maybe we can use it later
|
||||
evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
|
||||
|
||||
evergreen_reg_set(res, R_0288D4_SQ_PGM_RESOURCES_LS,
|
||||
S_0288D4_NUM_GPRS(ctx->cs_shader->bc.ngpr)
|
||||
| S_0288D4_STACK_SIZE(ctx->cs_shader->bc.nstack));
|
||||
evergreen_reg_set(res, R_0288D8_SQ_PGM_RESOURCES_LS_2, 0);
|
||||
|
||||
evergreen_reg_set(res, R_0288D0_SQ_PGM_START_LS, 0);
|
||||
res->bo = ctx->cs_shader->shader_code_bo;
|
||||
res->usage = RADEON_USAGE_READ;
|
||||
res->coher_bo_size = ctx->cs_shader->bc.ndw*4;
|
||||
res->flags = COMPUTE_RES_SH_FLUSH;
|
||||
|
||||
/* We can't always determine the
|
||||
* number of iterations in a loop before it's executed,
|
||||
* so we just need to set up the loop counter to give us the maximum
|
||||
* number of iterations possible. Currently, loops in shader code
|
||||
* ignore the loop counter and use a break instruction to exit the
|
||||
* loop at the correct time.
|
||||
*/
|
||||
evergreen_set_loop_const(ctx->cs_shader,
|
||||
0, /* index */
|
||||
0xFFF, /* Maximum value of the loop counter (i.e. when the loop
|
||||
* counter reaches this value, the program will break
|
||||
* out of the loop. */
|
||||
0x0, /* Starting value of the loop counter. */
|
||||
0x1); /* Amount to increment the loop counter each iteration. */
|
||||
}
|
||||
|
||||
/* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
|
||||
* kernel parameters there are inplicit parameters that need to be stored
|
||||
* in the vertex buffer as well. Here is how these parameters are organized in
|
||||
* the buffer:
|
||||
*
|
||||
* DWORDS 0-2: Number of work groups in each dimension (x,y,z)
|
||||
* DWORDS 3-5: Number of global work items in each dimension (x,y,z)
|
||||
* DWORDS 6-8: Number of work items within each work group in each dimension
|
||||
* (x,y,z)
|
||||
* DWORDS 9+ : Kernel parameters
|
||||
*/
|
||||
void evergreen_compute_upload_input(
|
||||
struct pipe_context *ctx_,
|
||||
const uint *block_layout,
|
||||
const uint *grid_layout,
|
||||
const void *input)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
int i;
|
||||
unsigned kernel_parameters_offset_bytes = 36;
|
||||
uint32_t * num_work_groups_start;
|
||||
uint32_t * global_size_start;
|
||||
uint32_t * local_size_start;
|
||||
uint32_t * kernel_parameters_start;
|
||||
|
||||
if (ctx->cs_shader->input_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ctx->cs_shader->kernel_param) {
|
||||
unsigned buffer_size = ctx->cs_shader->input_size;
|
||||
|
||||
/* Add space for the grid dimensions */
|
||||
buffer_size += kernel_parameters_offset_bytes * sizeof(uint);
|
||||
ctx->cs_shader->kernel_param =
|
||||
r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
buffer_size);
|
||||
}
|
||||
|
||||
num_work_groups_start = ctx->ws->buffer_map(
|
||||
ctx->cs_shader->kernel_param->cs_buf,
|
||||
ctx->cs, PIPE_TRANSFER_WRITE);
|
||||
global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
|
||||
local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
|
||||
kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
|
||||
|
||||
/* Copy the work group size */
|
||||
memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
|
||||
|
||||
/* Copy the global size */
|
||||
for (i = 0; i < 3; i++) {
|
||||
global_size_start[i] = grid_layout[i] * block_layout[i];
|
||||
}
|
||||
|
||||
/* Copy the local dimensions */
|
||||
memcpy(local_size_start, block_layout, 3 * sizeof(uint));
|
||||
|
||||
/* Copy the kernel inputs */
|
||||
memcpy(kernel_parameters_start, input, ctx->cs_shader->input_size);
|
||||
|
||||
for (i = 0; i < (kernel_parameters_offset_bytes / 4) +
|
||||
(ctx->cs_shader->input_size / 4); i++) {
|
||||
COMPUTE_DBG("input %i : %i\n", i,
|
||||
((unsigned*)num_work_groups_start)[i]);
|
||||
}
|
||||
|
||||
ctx->ws->buffer_unmap(ctx->cs_shader->kernel_param->cs_buf);
|
||||
|
||||
///ID=0 is reserved for the parameters
|
||||
evergreen_set_vtx_resource(ctx->cs_shader,
|
||||
ctx->cs_shader->kernel_param, 0, 0, 0);
|
||||
///ID=0 is reserved for parameters
|
||||
evergreen_set_const_cache(ctx->cs_shader, 0,
|
||||
ctx->cs_shader->kernel_param, ctx->cs_shader->input_size, 0);
|
||||
}
|
||||
|
||||
void evergreen_direct_dispatch(
|
||||
struct pipe_context *ctx_,
|
||||
const uint *block_layout, const uint *grid_layout)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
|
||||
int i;
|
||||
|
||||
struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
|
||||
COMPUTE_RESOURCE_DISPATCH, 0);
|
||||
|
||||
evergreen_reg_set(res, R_008958_VGT_PRIMITIVE_TYPE, V_008958_DI_PT_POINTLIST);
|
||||
|
||||
evergreen_reg_set(res, R_00899C_VGT_COMPUTE_START_X, 0);
|
||||
evergreen_reg_set(res, R_0089A0_VGT_COMPUTE_START_Y, 0);
|
||||
evergreen_reg_set(res, R_0089A4_VGT_COMPUTE_START_Z, 0);
|
||||
|
||||
evergreen_reg_set(res, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, block_layout[0]);
|
||||
evergreen_reg_set(res, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y, block_layout[1]);
|
||||
evergreen_reg_set(res, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z, block_layout[2]);
|
||||
|
||||
int group_size = 1;
|
||||
|
||||
int grid_size = 1;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
group_size *= block_layout[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
grid_size *= grid_layout[i];
|
||||
}
|
||||
|
||||
evergreen_reg_set(res, R_008970_VGT_NUM_INDICES, group_size);
|
||||
evergreen_reg_set(res, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, group_size);
|
||||
|
||||
evergreen_emit_raw_value(res, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
|
||||
evergreen_emit_raw_value(res, grid_layout[0]);
|
||||
evergreen_emit_raw_value(res, grid_layout[1]);
|
||||
evergreen_emit_raw_value(res, grid_layout[2]);
|
||||
///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN
|
||||
evergreen_emit_raw_value(res, 1);
|
||||
}
|
||||
|
||||
static void compute_emit_cs(struct r600_context *ctx)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = ctx->cs;
|
||||
int i;
|
||||
|
||||
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
|
||||
|
||||
struct r600_resource *onebo = NULL;
|
||||
|
||||
for (i = 0; i < get_compute_resource_num(); i++) {
|
||||
if (ctx->cs_shader->resources[i].enabled) {
|
||||
int j;
|
||||
COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw);
|
||||
|
||||
for (j = 0; j < ctx->cs_shader->resources[i].cs_end; j++) {
|
||||
if (ctx->cs_shader->resources[i].do_reloc[j]) {
|
||||
assert(ctx->cs_shader->resources[i].bo);
|
||||
evergreen_emit_ctx_reloc(ctx,
|
||||
ctx->cs_shader->resources[i].bo,
|
||||
ctx->cs_shader->resources[i].usage);
|
||||
}
|
||||
|
||||
cs->buf[cs->cdw++] = ctx->cs_shader->resources[i].cs[j];
|
||||
}
|
||||
|
||||
if (ctx->cs_shader->resources[i].bo) {
|
||||
onebo = ctx->cs_shader->resources[i].bo;
|
||||
evergreen_emit_ctx_reloc(ctx,
|
||||
ctx->cs_shader->resources[i].bo,
|
||||
ctx->cs_shader->resources[i].usage);
|
||||
|
||||
///special case for textures
|
||||
if (ctx->cs_shader->resources[i].do_reloc
|
||||
[ctx->cs_shader->resources[i].cs_end] == 2) {
|
||||
evergreen_emit_ctx_reloc(ctx,
|
||||
ctx->cs_shader->resources[i].bo,
|
||||
ctx->cs_shader->resources[i].usage);
|
||||
}
|
||||
|
||||
evergreen_set_buffer_sync(ctx, ctx->cs_shader->resources[i].bo,
|
||||
ctx->cs_shader->resources[i].coher_bo_size,
|
||||
ctx->cs_shader->resources[i].flags,
|
||||
ctx->cs_shader->resources[i].usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
COMPUTE_DBG("cdw: %i\n", cs->cdw);
|
||||
for (i = 0; i < cs->cdw; i++) {
|
||||
COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
ctx->ws->cs_flush(ctx->cs, RADEON_FLUSH_ASYNC);
|
||||
|
||||
ctx->pm4_dirty_cdwords = 0;
|
||||
ctx->flags = 0;
|
||||
|
||||
COMPUTE_DBG("shader started\n");
|
||||
|
||||
ctx->ws->buffer_wait(onebo->buf, 0);
|
||||
|
||||
COMPUTE_DBG("...\n");
|
||||
|
||||
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
|
||||
|
||||
ctx->streamout_start = TRUE;
|
||||
ctx->streamout_append_bitmask = ~0;
|
||||
|
||||
}
|
||||
|
||||
static void evergreen_launch_grid(
|
||||
struct pipe_context *ctx_,
|
||||
const uint *block_layout, const uint *grid_layout,
|
||||
uint32_t pc, const void *input)
|
||||
{
|
||||
COMPUTE_DBG("PC: %i\n", pc);
|
||||
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
unsigned num_waves;
|
||||
unsigned num_pipes = ctx->screen->info.r600_max_pipes;
|
||||
unsigned wave_divisor = (16 * num_pipes);
|
||||
|
||||
/* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
|
||||
num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
|
||||
wave_divisor - 1) / wave_divisor;
|
||||
|
||||
COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
|
||||
num_pipes, num_waves);
|
||||
|
||||
evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves);
|
||||
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
|
||||
evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
|
||||
compute_emit_cs(ctx);
|
||||
}
|
||||
|
||||
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
|
||||
unsigned start, unsigned count,
|
||||
struct pipe_surface ** surfaces)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_surface **resources = (struct r600_surface **)surfaces;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (resources[i]) {
|
||||
struct r600_resource_global *buffer =
|
||||
(struct r600_resource_global*)resources[i]->base.texture;
|
||||
if (resources[i]->base.writable) {
|
||||
assert(i+1 < 12);
|
||||
struct r600_resource_global *buffer =
|
||||
(struct r600_resource_global*)
|
||||
resources[i]->base.texture;
|
||||
|
||||
evergreen_set_rat(ctx->cs_shader, i+1,
|
||||
(struct r600_resource *)resources[i]->base.texture,
|
||||
buffer->chunk->start_in_dw*4,
|
||||
resources[i]->base.texture->width0);
|
||||
}
|
||||
|
||||
evergreen_set_vtx_resource(ctx->cs_shader,
|
||||
(struct r600_resource *)resources[i]->base.texture, i+2,
|
||||
buffer->chunk->start_in_dw*4, resources[i]->base.writable);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
|
||||
unsigned start_slot, unsigned count,
|
||||
struct pipe_sampler_view **views)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_pipe_sampler_view **resource =
|
||||
(struct r600_pipe_sampler_view **)views;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (resource[i]) {
|
||||
assert(i+1 < 12);
|
||||
///FETCH0 = VTX0 (param buffer),
|
||||
//FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
|
||||
evergreen_set_tex_resource(ctx->cs_shader, resource[i], i+2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void evergreen_bind_compute_sampler_states(
|
||||
struct pipe_context *ctx_,
|
||||
unsigned start_slot,
|
||||
unsigned num_samplers,
|
||||
void **samplers_)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct compute_sampler_state ** samplers =
|
||||
(struct compute_sampler_state **)samplers_;
|
||||
|
||||
for (int i = 0; i < num_samplers; i++) {
|
||||
if (samplers[i]) {
|
||||
evergreen_set_sampler_resource(ctx->cs_shader, samplers[i], i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void evergreen_set_global_binding(
|
||||
struct pipe_context *ctx_, unsigned first, unsigned n,
|
||||
struct pipe_resource **resources,
|
||||
uint32_t **handles)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct compute_memory_pool *pool = ctx->screen->global_pool;
|
||||
struct r600_resource_global **buffers =
|
||||
(struct r600_resource_global **)resources;
|
||||
|
||||
if (!resources) {
|
||||
/* XXX: Unset */
|
||||
return;
|
||||
}
|
||||
|
||||
compute_memory_finalize_pending(pool, ctx_);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
assert(resources[i]->target == PIPE_BUFFER);
|
||||
assert(resources[i]->bind & PIPE_BIND_GLOBAL);
|
||||
|
||||
*(handles[i]) = buffers[i]->chunk->start_in_dw * 4;
|
||||
}
|
||||
|
||||
evergreen_set_rat(ctx->cs_shader, 0, pool->bo, 0, pool->size_in_dw * 4);
|
||||
evergreen_set_vtx_resource(ctx->cs_shader, pool->bo, 1, 0, 1);
|
||||
}
|
||||
|
||||
|
||||
void evergreen_compute_init_config(struct r600_context *ctx)
|
||||
{
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(ctx->cs_shader, COMPUTE_RESOURCE_CONFIG, 0);
|
||||
|
||||
int num_threads;
|
||||
int num_stack_entries;
|
||||
int num_temp_gprs;
|
||||
|
||||
enum radeon_family family;
|
||||
unsigned tmp;
|
||||
|
||||
family = ctx->family;
|
||||
|
||||
switch (family) {
|
||||
case CHIP_CEDAR:
|
||||
default:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
case CHIP_REDWOOD:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
case CHIP_JUNIPER:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 512;
|
||||
break;
|
||||
case CHIP_CYPRESS:
|
||||
case CHIP_HEMLOCK:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 512;
|
||||
break;
|
||||
case CHIP_PALM:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
case CHIP_SUMO:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
case CHIP_SUMO2:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 512;
|
||||
break;
|
||||
case CHIP_BARTS:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 512;
|
||||
break;
|
||||
case CHIP_TURKS:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
case CHIP_CAICOS:
|
||||
num_temp_gprs = 4;
|
||||
num_threads = 128;
|
||||
num_stack_entries = 256;
|
||||
break;
|
||||
}
|
||||
|
||||
tmp = 0x00000000;
|
||||
switch (family) {
|
||||
case CHIP_CEDAR:
|
||||
case CHIP_PALM:
|
||||
case CHIP_SUMO:
|
||||
case CHIP_SUMO2:
|
||||
case CHIP_CAICOS:
|
||||
break;
|
||||
default:
|
||||
tmp |= S_008C00_VC_ENABLE(1);
|
||||
break;
|
||||
}
|
||||
tmp |= S_008C00_EXPORT_SRC_C(1);
|
||||
tmp |= S_008C00_CS_PRIO(0);
|
||||
tmp |= S_008C00_LS_PRIO(0);
|
||||
tmp |= S_008C00_HS_PRIO(0);
|
||||
tmp |= S_008C00_PS_PRIO(0);
|
||||
tmp |= S_008C00_VS_PRIO(0);
|
||||
tmp |= S_008C00_GS_PRIO(0);
|
||||
tmp |= S_008C00_ES_PRIO(0);
|
||||
|
||||
evergreen_reg_set(res, R_008C00_SQ_CONFIG, tmp);
|
||||
|
||||
evergreen_reg_set(res, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
|
||||
S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
|
||||
evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0);
|
||||
evergreen_reg_set(res, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0);
|
||||
evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
|
||||
evergreen_reg_set(res, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
|
||||
/* workaround for hw issues with dyn gpr - must set all limits to 240
|
||||
* instead of 0, 0x1e == 240/8 */
|
||||
evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
|
||||
S_028838_PS_GPRS(0x1e) |
|
||||
S_028838_VS_GPRS(0x1e) |
|
||||
S_028838_GS_GPRS(0x1e) |
|
||||
S_028838_ES_GPRS(0x1e) |
|
||||
S_028838_HS_GPRS(0x1e) |
|
||||
S_028838_LS_GPRS(0x1e));
|
||||
|
||||
|
||||
evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
|
||||
evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
|
||||
evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF);
|
||||
evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0);
|
||||
tmp = S_008C1C_NUM_LS_THREADS(num_threads);
|
||||
evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp);
|
||||
evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0);
|
||||
evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0);
|
||||
tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries);
|
||||
evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp);
|
||||
evergreen_reg_set(res, R_0286CC_SPI_PS_IN_CONTROL_0, S_0286CC_LINEAR_GRADIENT_ENA(1));
|
||||
evergreen_reg_set(res, R_0286D0_SPI_PS_IN_CONTROL_1, 0);
|
||||
evergreen_reg_set(res, R_0286E4_SPI_PS_IN_CONTROL_2, 0);
|
||||
evergreen_reg_set(res, R_0286D8_SPI_INPUT_Z, 0);
|
||||
evergreen_reg_set(res, R_0286E0_SPI_BARYC_CNTL, 1 << 20);
|
||||
tmp = S_0286E8_TID_IN_GROUP_ENA | S_0286E8_TGID_ENA | S_0286E8_DISABLE_INDEX_PACK;
|
||||
evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL, tmp);
|
||||
tmp = S_028A40_COMPUTE_MODE(1) | S_028A40_PARTIAL_THD_AT_EOI(1);
|
||||
evergreen_reg_set(res, R_028A40_VGT_GS_MODE, tmp);
|
||||
evergreen_reg_set(res, R_028B54_VGT_SHADER_STAGES_EN, 2/*CS_ON*/);
|
||||
evergreen_reg_set(res, R_028800_DB_DEPTH_CONTROL, 0);
|
||||
evergreen_reg_set(res, R_02880C_DB_SHADER_CONTROL, 0);
|
||||
evergreen_reg_set(res, R_028000_DB_RENDER_CONTROL, S_028000_COLOR_DISABLE(1));
|
||||
evergreen_reg_set(res, R_02800C_DB_RENDER_OVERRIDE, 0);
|
||||
evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL,
|
||||
S_0286E8_TID_IN_GROUP_ENA
|
||||
| S_0286E8_TGID_ENA
|
||||
| S_0286E8_DISABLE_INDEX_PACK)
|
||||
;
|
||||
}
|
||||
|
||||
void evergreen_init_compute_state_functions(struct r600_context *ctx)
|
||||
{
|
||||
ctx->context.create_compute_state = evergreen_create_compute_state;
|
||||
ctx->context.delete_compute_state = evergreen_delete_compute_state;
|
||||
ctx->context.bind_compute_state = evergreen_bind_compute_state;
|
||||
// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
|
||||
ctx->context.set_compute_resources = evergreen_set_compute_resources;
|
||||
ctx->context.set_compute_sampler_views = evergreen_set_cs_sampler_view;
|
||||
ctx->context.bind_compute_sampler_states = evergreen_bind_compute_sampler_states;
|
||||
ctx->context.set_global_binding = evergreen_set_global_binding;
|
||||
ctx->context.launch_grid = evergreen_launch_grid;
|
||||
}
|
||||
|
||||
|
||||
struct pipe_resource *r600_compute_global_buffer_create(
|
||||
struct pipe_screen *screen,
|
||||
const struct pipe_resource *templ)
|
||||
{
|
||||
assert(templ->target == PIPE_BUFFER);
|
||||
assert(templ->bind & PIPE_BIND_GLOBAL);
|
||||
assert(templ->array_size == 1 || templ->array_size == 0);
|
||||
assert(templ->depth0 == 1 || templ->depth0 == 0);
|
||||
assert(templ->height0 == 1 || templ->height0 == 0);
|
||||
|
||||
struct r600_resource_global* result = (struct r600_resource_global*)
|
||||
CALLOC(sizeof(struct r600_resource_global), 1);
|
||||
struct r600_screen* rscreen = (struct r600_screen*)screen;
|
||||
|
||||
result->base.b.vtbl = &r600_global_buffer_vtbl;
|
||||
result->base.b.b.screen = screen;
|
||||
result->base.b.b = *templ;
|
||||
pipe_reference_init(&result->base.b.b.reference, 1);
|
||||
|
||||
int size_in_dw = (templ->width0+3) / 4;
|
||||
|
||||
result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw);
|
||||
|
||||
if (result->chunk == NULL)
|
||||
{
|
||||
free(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &result->base.b.b;
|
||||
}
|
||||
|
||||
void r600_compute_global_buffer_destroy(
|
||||
struct pipe_screen *screen,
|
||||
struct pipe_resource *res)
|
||||
{
|
||||
assert(res->target == PIPE_BUFFER);
|
||||
assert(res->bind & PIPE_BIND_GLOBAL);
|
||||
|
||||
struct r600_resource_global* buffer = (struct r600_resource_global*)res;
|
||||
struct r600_screen* rscreen = (struct r600_screen*)screen;
|
||||
|
||||
compute_memory_free(rscreen->global_pool, buffer->chunk->id);
|
||||
|
||||
buffer->chunk = NULL;
|
||||
free(res);
|
||||
}
|
||||
|
||||
void* r600_compute_global_transfer_map(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer* transfer)
|
||||
{
|
||||
assert(transfer->resource->target == PIPE_BUFFER);
|
||||
assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
|
||||
assert(transfer->box.x >= 0);
|
||||
assert(transfer->box.y == 0);
|
||||
assert(transfer->box.z == 0);
|
||||
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_resource_global* buffer =
|
||||
(struct r600_resource_global*)transfer->resource;
|
||||
|
||||
uint32_t* map;
|
||||
///TODO: do it better, mapping is not possible if the pool is too big
|
||||
|
||||
if (!(map = ctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf,
|
||||
ctx->cs, transfer->usage))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
COMPUTE_DBG("buffer start: %lli\n", buffer->chunk->start_in_dw);
|
||||
return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x;
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_unmap(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer* transfer)
|
||||
{
|
||||
assert(transfer->resource->target == PIPE_BUFFER);
|
||||
assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
|
||||
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_resource_global* buffer =
|
||||
(struct r600_resource_global*)transfer->resource;
|
||||
|
||||
ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf);
|
||||
}
|
||||
|
||||
struct pipe_transfer * r600_compute_global_get_transfer(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct compute_memory_pool *pool = ctx->screen->global_pool;
|
||||
|
||||
compute_memory_finalize_pending(pool, ctx_);
|
||||
|
||||
assert(resource->target == PIPE_BUFFER);
|
||||
struct r600_context *rctx = (struct r600_context*)ctx_;
|
||||
struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
|
||||
|
||||
transfer->resource = resource;
|
||||
transfer->level = level;
|
||||
transfer->usage = usage;
|
||||
transfer->box = *box;
|
||||
transfer->stride = 0;
|
||||
transfer->layer_stride = 0;
|
||||
transfer->data = NULL;
|
||||
|
||||
/* Note strides are zero, this is ok for buffers, but not for
|
||||
* textures 2d & higher at least.
|
||||
*/
|
||||
return transfer;
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_destroy(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer *transfer)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx_;
|
||||
util_slab_free(&rctx->pool_transfers, transfer);
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_flush_region(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer *transfer,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_inline_write(
|
||||
struct pipe_context *pipe,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
const void *data,
|
||||
unsigned stride,
|
||||
unsigned layer_stride)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
69
src/gallium/drivers/r600/evergreen_compute.h
Normal file
69
src/gallium/drivers/r600/evergreen_compute.h
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright 2011 Adam Rak <adam.rak@streamnovation.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#ifndef EVERGREEN_COMPUTE_H
|
||||
#define EVERGREEN_COMPUTE_H
|
||||
#include "r600.h"
|
||||
#include "r600_pipe.h"
|
||||
|
||||
struct evergreen_compute_resource;
|
||||
|
||||
void *evergreen_create_compute_state(struct pipe_context *ctx, const const struct pipe_compute_state *cso);
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
|
||||
void evergreen_direct_dispatch( struct pipe_context *context, const uint *block_layout, const uint *grid_layout);
|
||||
void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input);
|
||||
void evergreen_compute_init_config(struct r600_context *rctx);
|
||||
void evergreen_init_compute_state_functions(struct r600_context *rctx);
|
||||
|
||||
struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ);
|
||||
void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res);
|
||||
void* r600_compute_global_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer);
|
||||
void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer);
|
||||
struct pipe_transfer * r600_compute_global_get_transfer(struct pipe_context *, struct pipe_resource *, unsigned level,
|
||||
unsigned usage, const struct pipe_box *);
|
||||
void r600_compute_global_transfer_destroy(struct pipe_context *, struct pipe_transfer *);
|
||||
void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *);
|
||||
void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level,
|
||||
unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
|
||||
|
||||
|
||||
static inline void COMPUTE_DBG(const char *fmt, ...)
|
||||
{
|
||||
static bool check_debug = false, debug = false;
|
||||
|
||||
if (!check_debug) {
|
||||
debug = debug_get_bool_option("R600_COMPUTE_DEBUG", FALSE);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
_debug_vprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
830
src/gallium/drivers/r600/evergreen_compute_internal.c
Normal file
830
src/gallium/drivers/r600/evergreen_compute_internal.c
Normal file
|
|
@ -0,0 +1,830 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/u_double_list.h"
|
||||
#include "util/u_transfer.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_framebuffer.h"
|
||||
#include "r600.h"
|
||||
#include "r600_resource.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_pipe.h"
|
||||
#include "r600_formats.h"
|
||||
#include "evergreend.h"
|
||||
#include "evergreen_compute_internal.h"
|
||||
#include "r600_hw_context_priv.h"
|
||||
|
||||
int get_compute_resource_num(void)
|
||||
{
|
||||
int num = 0;
|
||||
#define DECL_COMPUTE_RESOURCE(name, n) num += n;
|
||||
#include "compute_resource.def"
|
||||
#undef DECL_COMPUTE_RESOURCE
|
||||
return num;
|
||||
}
|
||||
|
||||
void evergreen_emit_raw_value(
|
||||
struct evergreen_compute_resource* res,
|
||||
unsigned value)
|
||||
{
|
||||
res->cs[res->cs_end++] = value;
|
||||
}
|
||||
|
||||
void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
|
||||
{
|
||||
ctx->cs->buf[ctx->cs->cdw++] = value;
|
||||
}
|
||||
|
||||
void evergreen_mult_reg_set_(
|
||||
struct evergreen_compute_resource* res,
|
||||
int index,
|
||||
u32* array,
|
||||
int size)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
evergreen_emit_raw_reg_set(res, index, size / 4);
|
||||
|
||||
for (i = 0; i < size; i+=4) {
|
||||
res->cs[res->cs_end++] = array[i / 4];
|
||||
}
|
||||
}
|
||||
|
||||
void evergreen_reg_set(
|
||||
struct evergreen_compute_resource* res,
|
||||
unsigned index,
|
||||
unsigned value)
|
||||
{
|
||||
evergreen_emit_raw_reg_set(res, index, 1);
|
||||
res->cs[res->cs_end++] = value;
|
||||
}
|
||||
|
||||
struct evergreen_compute_resource* get_empty_res(
|
||||
struct r600_pipe_compute* pipe,
|
||||
enum evergreen_compute_resources res_code,
|
||||
int offset_index)
|
||||
{
|
||||
int code_index = -1;
|
||||
int code_size = -1;
|
||||
|
||||
{
|
||||
int i = 0;
|
||||
#define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n;
|
||||
#include "compute_resource.def"
|
||||
#undef DECL_COMPUTE_RESOURCE
|
||||
}
|
||||
|
||||
assert(code_index != -1 && "internal error: resouce index not found");
|
||||
assert(offset_index < code_size && "internal error: overindexing resource");
|
||||
|
||||
int index = code_index + offset_index;
|
||||
|
||||
struct evergreen_compute_resource* res = &pipe->resources[index];
|
||||
|
||||
res->enabled = true;
|
||||
res->bo = NULL;
|
||||
res->cs_end = 0;
|
||||
bzero(&res->do_reloc, sizeof(res->do_reloc));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void evergreen_emit_raw_reg_set(
|
||||
struct evergreen_compute_resource* res,
|
||||
unsigned index,
|
||||
int num)
|
||||
{
|
||||
res->enabled = 1;
|
||||
int cs_end = res->cs_end;
|
||||
|
||||
if (index >= EVERGREEN_CONFIG_REG_OFFSET
|
||||
&& index < EVERGREEN_CONFIG_REG_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
|
||||
&& index < EVERGREEN_CONTEXT_REG_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_RESOURCE_OFFSET
|
||||
&& index < EVERGREEN_RESOURCE_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_SAMPLER_OFFSET
|
||||
&& index < EVERGREEN_SAMPLER_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_CTL_CONST_OFFSET
|
||||
&& index < EVERGREEN_CTL_CONST_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
|
||||
&& index < EVERGREEN_LOOP_CONST_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
|
||||
&& index < EVERGREEN_BOOL_CONST_END) {
|
||||
res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
|
||||
res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
|
||||
} else {
|
||||
res->cs[cs_end] = PKT0(index, num-1);
|
||||
res->cs_end--;
|
||||
}
|
||||
|
||||
res->cs_end += 2;
|
||||
}
|
||||
|
||||
void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
|
||||
{
|
||||
res->do_reloc[res->cs_end] += 1;
|
||||
}
|
||||
|
||||
void evergreen_emit_ctx_reg_set(
|
||||
struct r600_context *ctx,
|
||||
unsigned index,
|
||||
int num)
|
||||
{
|
||||
|
||||
if (index >= EVERGREEN_CONFIG_REG_OFFSET
|
||||
&& index < EVERGREEN_CONFIG_REG_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
|
||||
&& index < EVERGREEN_CONTEXT_REG_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_RESOURCE_OFFSET
|
||||
&& index < EVERGREEN_RESOURCE_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_SAMPLER_OFFSET
|
||||
&& index < EVERGREEN_SAMPLER_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_CTL_CONST_OFFSET
|
||||
&& index < EVERGREEN_CTL_CONST_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
|
||||
&& index < EVERGREEN_LOOP_CONST_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
|
||||
} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
|
||||
&& index < EVERGREEN_BOOL_CONST_END) {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
|
||||
} else {
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
|
||||
}
|
||||
}
|
||||
|
||||
void evergreen_emit_ctx_reloc(
|
||||
struct r600_context *ctx,
|
||||
struct r600_resource *bo,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
assert(bo);
|
||||
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
u32 rr = r600_context_bo_reloc(ctx, bo, usage);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = rr;
|
||||
}
|
||||
|
||||
void evergreen_set_buffer_sync(
|
||||
struct r600_context *ctx,
|
||||
struct r600_resource* bo,
|
||||
int size,
|
||||
int flags,
|
||||
enum radeon_bo_usage usage)
|
||||
{
|
||||
assert(bo);
|
||||
int32_t cp_coher_size = 0;
|
||||
|
||||
if (size == 0xffffffff || size == 0) {
|
||||
cp_coher_size = 0xffffffff;
|
||||
}
|
||||
else {
|
||||
cp_coher_size = ((size + 255) >> 8);
|
||||
}
|
||||
|
||||
uint32_t sync_flags = 0;
|
||||
|
||||
if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
|
||||
sync_flags |= S_0085F0_TC_ACTION_ENA(1);
|
||||
}
|
||||
|
||||
if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
|
||||
sync_flags |= S_0085F0_VC_ACTION_ENA(1);
|
||||
}
|
||||
|
||||
if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
|
||||
sync_flags |= S_0085F0_SH_ACTION_ENA(1);
|
||||
}
|
||||
|
||||
if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
|
||||
sync_flags |= S_0085F0_CB_ACTION_ENA(1);
|
||||
|
||||
switch((flags >> 8) & 0xF) {
|
||||
case 0:
|
||||
sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 1:
|
||||
sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 2:
|
||||
sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 3:
|
||||
sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 4:
|
||||
sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 5:
|
||||
sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 6:
|
||||
sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 7:
|
||||
sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 8:
|
||||
sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 9:
|
||||
sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 10:
|
||||
sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
|
||||
break;
|
||||
case 11:
|
||||
sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t poll_interval = 10;
|
||||
|
||||
ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
|
||||
ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
|
||||
ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
|
||||
ctx->cs->buf[ctx->cs->cdw++] = 0;
|
||||
ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
|
||||
|
||||
if (cp_coher_size != 0xffffffff) {
|
||||
evergreen_emit_ctx_reloc(ctx, bo, usage);
|
||||
}
|
||||
}
|
||||
|
||||
int evergreen_compute_get_gpu_format(
|
||||
struct number_type_and_format* fmt,
|
||||
struct r600_resource *bo)
|
||||
{
|
||||
switch (bo->b.b.format)
|
||||
{
|
||||
case PIPE_FORMAT_R8_UNORM:
|
||||
case PIPE_FORMAT_R32_UNORM:
|
||||
fmt->format = V_028C70_COLOR_32;
|
||||
fmt->number_type = V_028C70_NUMBER_UNORM;
|
||||
fmt->num_format_all = 0;
|
||||
break;
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
fmt->format = V_028C70_COLOR_32_FLOAT;
|
||||
fmt->number_type = V_028C70_NUMBER_FLOAT;
|
||||
fmt->num_format_all = 0;
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
|
||||
fmt->number_type = V_028C70_NUMBER_FLOAT;
|
||||
fmt->num_format_all = 0;
|
||||
break;
|
||||
|
||||
///TODO: other formats...
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void evergreen_set_rat(
|
||||
struct r600_pipe_compute *pipe,
|
||||
int id,
|
||||
struct r600_resource* bo,
|
||||
int start,
|
||||
int size)
|
||||
{
|
||||
assert(id < 12);
|
||||
assert((size & 3) == 0);
|
||||
assert((start & 0xFF) == 0);
|
||||
|
||||
int offset;
|
||||
COMPUTE_DBG("bind rat: %i \n", id);
|
||||
|
||||
if (id < 8) {
|
||||
offset = id*0x3c;
|
||||
}
|
||||
else {
|
||||
offset = 8*0x3c + (id-8)*0x1c;
|
||||
}
|
||||
|
||||
int linear = 0;
|
||||
|
||||
if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
|
||||
&& bo->b.b.target == PIPE_BUFFER) {
|
||||
linear = 1;
|
||||
}
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
|
||||
|
||||
evergreen_emit_force_reloc(res);
|
||||
|
||||
evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
|
||||
evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
|
||||
|
||||
struct number_type_and_format fmt;
|
||||
|
||||
///default config
|
||||
if (bo->b.b.format == PIPE_FORMAT_NONE) {
|
||||
fmt.format = V_028C70_COLOR_32;
|
||||
fmt.number_type = V_028C70_NUMBER_FLOAT;
|
||||
} else {
|
||||
evergreen_compute_get_gpu_format(&fmt, bo);
|
||||
}
|
||||
|
||||
if (linear) {
|
||||
evergreen_reg_set(res,
|
||||
R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
|
||||
| S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
|
||||
| S_028C70_FORMAT(fmt.format)
|
||||
| S_028C70_NUMBER_TYPE(fmt.number_type)
|
||||
);
|
||||
evergreen_emit_force_reloc(res);
|
||||
} else {
|
||||
assert(0 && "TODO");
|
||||
///TODO
|
||||
// evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
|
||||
// evergreen_emit_force_reloc(res);
|
||||
}
|
||||
|
||||
evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
|
||||
evergreen_emit_force_reloc(res);
|
||||
|
||||
if (linear) {
|
||||
/* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
|
||||
evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
|
||||
} else {
|
||||
evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
|
||||
S_028C78_WIDTH_MAX(bo->b.b.width0)
|
||||
| S_028C78_HEIGHT_MAX(bo->b.b.height0));
|
||||
}
|
||||
|
||||
if (id < 8) {
|
||||
evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
|
||||
evergreen_emit_force_reloc(res);
|
||||
evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
|
||||
evergreen_emit_force_reloc(res);
|
||||
}
|
||||
|
||||
evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
|
||||
|
||||
res->bo = bo;
|
||||
res->usage = RADEON_USAGE_READWRITE;
|
||||
res->coher_bo_size = size;
|
||||
res->flags = COMPUTE_RES_CB_FLUSH(id);
|
||||
}
|
||||
|
||||
void evergreen_set_lds(
|
||||
struct r600_pipe_compute *pipe,
|
||||
int num_lds,
|
||||
int size,
|
||||
int num_waves)
|
||||
{
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
|
||||
|
||||
evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
|
||||
S_008E2C_NUM_LS_LDS(num_lds));
|
||||
evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
|
||||
}
|
||||
|
||||
void evergreen_set_gds(
|
||||
struct r600_pipe_compute *pipe,
|
||||
uint32_t addr,
|
||||
uint32_t size)
|
||||
{
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
|
||||
|
||||
evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
|
||||
evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
|
||||
evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
|
||||
}
|
||||
|
||||
void evergreen_set_export(
|
||||
struct r600_pipe_compute *pipe,
|
||||
struct r600_resource* bo,
|
||||
int offset, int size)
|
||||
{
|
||||
#define SX_MEMORY_EXPORT_BASE 0x9010
|
||||
#define SX_MEMORY_EXPORT_SIZE 0x9014
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
|
||||
|
||||
evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
|
||||
|
||||
if (size) {
|
||||
evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
|
||||
res->bo = bo;
|
||||
res->usage = RADEON_USAGE_WRITE;
|
||||
res->coher_bo_size = size;
|
||||
res->flags = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void evergreen_set_loop_const(
|
||||
struct r600_pipe_compute *pipe,
|
||||
int id, int count, int init, int inc) {
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
|
||||
|
||||
assert(id < 32);
|
||||
assert(count <= 0xFFF);
|
||||
assert(init <= 0xFF);
|
||||
assert(inc <= 0xFF);
|
||||
|
||||
/* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
|
||||
* SQ_LOOP_CONST_191 */
|
||||
evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
|
||||
count | init << 12 | inc << 24);
|
||||
}
|
||||
|
||||
void evergreen_set_tmp_ring(
|
||||
struct r600_pipe_compute *pipe,
|
||||
struct r600_resource* bo,
|
||||
int offset, int size, int se)
|
||||
{
|
||||
#define SQ_LSTMP_RING_BASE 0x00008e10
|
||||
#define SQ_LSTMP_RING_SIZE 0x00008e14
|
||||
#define GRBM_GFX_INDEX 0x802C
|
||||
#define INSTANCE_INDEX(x) ((x) << 0)
|
||||
#define SE_INDEX(x) ((x) << 16)
|
||||
#define INSTANCE_BROADCAST_WRITES (1 << 30)
|
||||
#define SE_BROADCAST_WRITES (1 << 31)
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
|
||||
|
||||
evergreen_reg_set(res,
|
||||
GRBM_GFX_INDEX,INSTANCE_INDEX(0)
|
||||
| SE_INDEX(se)
|
||||
| INSTANCE_BROADCAST_WRITES);
|
||||
evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
|
||||
|
||||
if (size) {
|
||||
assert(bo);
|
||||
|
||||
evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
|
||||
res->bo = bo;
|
||||
res->usage = RADEON_USAGE_WRITE;
|
||||
res->coher_bo_size = 0;
|
||||
res->flags = 0;
|
||||
}
|
||||
|
||||
if (size) {
|
||||
evergreen_emit_force_reloc(res);
|
||||
}
|
||||
|
||||
evergreen_reg_set(res,
|
||||
GRBM_GFX_INDEX,INSTANCE_INDEX(0)
|
||||
| SE_INDEX(0)
|
||||
| INSTANCE_BROADCAST_WRITES
|
||||
| SE_BROADCAST_WRITES);
|
||||
}
|
||||
|
||||
static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
|
||||
{
|
||||
if (R600_BIG_ENDIAN) {
|
||||
switch(colorformat) {
|
||||
case V_028C70_COLOR_4_4:
|
||||
return ENDIAN_NONE;
|
||||
|
||||
/* 8-bit buffers. */
|
||||
case V_028C70_COLOR_8:
|
||||
return ENDIAN_NONE;
|
||||
|
||||
/* 16-bit buffers. */
|
||||
case V_028C70_COLOR_5_6_5:
|
||||
case V_028C70_COLOR_1_5_5_5:
|
||||
case V_028C70_COLOR_4_4_4_4:
|
||||
case V_028C70_COLOR_16:
|
||||
case V_028C70_COLOR_8_8:
|
||||
return ENDIAN_8IN16;
|
||||
|
||||
/* 32-bit buffers. */
|
||||
case V_028C70_COLOR_8_8_8_8:
|
||||
case V_028C70_COLOR_2_10_10_10:
|
||||
case V_028C70_COLOR_8_24:
|
||||
case V_028C70_COLOR_24_8:
|
||||
case V_028C70_COLOR_32_FLOAT:
|
||||
case V_028C70_COLOR_16_16_FLOAT:
|
||||
case V_028C70_COLOR_16_16:
|
||||
return ENDIAN_8IN32;
|
||||
|
||||
/* 64-bit buffers. */
|
||||
case V_028C70_COLOR_16_16_16_16:
|
||||
case V_028C70_COLOR_16_16_16_16_FLOAT:
|
||||
return ENDIAN_8IN16;
|
||||
|
||||
case V_028C70_COLOR_32_32_FLOAT:
|
||||
case V_028C70_COLOR_32_32:
|
||||
case V_028C70_COLOR_X24_8_32_FLOAT:
|
||||
return ENDIAN_8IN32;
|
||||
|
||||
/* 96-bit buffers. */
|
||||
case V_028C70_COLOR_32_32_32_FLOAT:
|
||||
/* 128-bit buffers. */
|
||||
case V_028C70_COLOR_32_32_32_32_FLOAT:
|
||||
case V_028C70_COLOR_32_32_32_32:
|
||||
return ENDIAN_8IN32;
|
||||
default:
|
||||
return ENDIAN_NONE; /* Unsupported. */
|
||||
}
|
||||
} else {
|
||||
return ENDIAN_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned r600_tex_dim(unsigned dim)
|
||||
{
|
||||
switch (dim) {
|
||||
default:
|
||||
case PIPE_TEXTURE_1D:
|
||||
return V_030000_SQ_TEX_DIM_1D;
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
return V_030000_SQ_TEX_DIM_1D_ARRAY;
|
||||
case PIPE_TEXTURE_2D:
|
||||
case PIPE_TEXTURE_RECT:
|
||||
return V_030000_SQ_TEX_DIM_2D;
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
return V_030000_SQ_TEX_DIM_2D_ARRAY;
|
||||
case PIPE_TEXTURE_3D:
|
||||
return V_030000_SQ_TEX_DIM_3D;
|
||||
case PIPE_TEXTURE_CUBE:
|
||||
return V_030000_SQ_TEX_DIM_CUBEMAP;
|
||||
}
|
||||
}
|
||||
|
||||
void evergreen_set_vtx_resource(
|
||||
struct r600_pipe_compute *pipe,
|
||||
struct r600_resource* bo,
|
||||
int id, uint64_t offset, int writable)
|
||||
{
|
||||
assert(id < 16);
|
||||
uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
|
||||
struct number_type_and_format fmt;
|
||||
|
||||
fmt.format = 0;
|
||||
|
||||
assert(bo->b.b.height0 <= 1);
|
||||
assert(bo->b.b.depth0 <= 1);
|
||||
|
||||
int e = evergreen_compute_get_gpu_format(&fmt, bo);
|
||||
|
||||
assert(e && "unknown format");
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
|
||||
|
||||
unsigned size = bo->b.b.width0;
|
||||
unsigned stride = 1;
|
||||
|
||||
// size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
|
||||
// util_format_get_blocksize(bo->b.b.b.format));
|
||||
|
||||
COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n",
|
||||
id, size, bo->b.b.width0);
|
||||
|
||||
sq_vtx_constant_word2 =
|
||||
S_030008_BASE_ADDRESS_HI(offset >> 32) |
|
||||
S_030008_STRIDE(stride) |
|
||||
S_030008_DATA_FORMAT(fmt.format) |
|
||||
S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
|
||||
S_030008_ENDIAN_SWAP(0);
|
||||
|
||||
COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
|
||||
stride, fmt.format, fmt.num_format_all);
|
||||
|
||||
sq_vtx_constant_word3 =
|
||||
S_03000C_DST_SEL_X(0) |
|
||||
S_03000C_DST_SEL_Y(1) |
|
||||
S_03000C_DST_SEL_Z(2) |
|
||||
S_03000C_DST_SEL_W(3);
|
||||
|
||||
sq_vtx_constant_word4 = 0;
|
||||
|
||||
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
|
||||
evergreen_emit_raw_value(res, (id+816)*32 >> 2);
|
||||
evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff));
|
||||
evergreen_emit_raw_value(res, size - 1);
|
||||
evergreen_emit_raw_value(res, sq_vtx_constant_word2);
|
||||
evergreen_emit_raw_value(res, sq_vtx_constant_word3);
|
||||
evergreen_emit_raw_value(res, sq_vtx_constant_word4);
|
||||
evergreen_emit_raw_value(res, 0);
|
||||
evergreen_emit_raw_value(res, 0);
|
||||
evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
|
||||
|
||||
res->bo = bo;
|
||||
|
||||
if (writable) {
|
||||
res->usage = RADEON_USAGE_READWRITE;
|
||||
}
|
||||
else {
|
||||
res->usage = RADEON_USAGE_READ;
|
||||
}
|
||||
|
||||
res->coher_bo_size = size;
|
||||
res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
|
||||
}
|
||||
|
||||
void evergreen_set_tex_resource(
|
||||
struct r600_pipe_compute *pipe,
|
||||
struct r600_pipe_sampler_view* view,
|
||||
int id)
|
||||
{
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
|
||||
struct r600_resource_texture *tmp =
|
||||
(struct r600_resource_texture*)view->base.texture;
|
||||
|
||||
unsigned format, endian;
|
||||
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
|
||||
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
|
||||
unsigned height, depth;
|
||||
|
||||
swizzle[0] = 0;
|
||||
swizzle[1] = 1;
|
||||
swizzle[2] = 2;
|
||||
swizzle[3] = 3;
|
||||
|
||||
format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
|
||||
view->base.format, swizzle, &word4, &yuv_format);
|
||||
|
||||
if (format == ~0) {
|
||||
format = 0;
|
||||
}
|
||||
|
||||
endian = r600_colorformat_endian_swap(format);
|
||||
|
||||
height = view->base.texture->height0;
|
||||
depth = view->base.texture->depth0;
|
||||
|
||||
pitch = align(tmp->pitch_in_blocks[0] *
|
||||
util_format_get_blockwidth(tmp->real_format), 8);
|
||||
array_mode = tmp->array_mode[0];
|
||||
tile_type = tmp->tile_type;
|
||||
|
||||
assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
|
||||
assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
|
||||
|
||||
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
|
||||
evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
|
||||
evergreen_emit_raw_value(res,
|
||||
(S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
|
||||
S_030000_PITCH((pitch / 8) - 1) |
|
||||
S_030000_NON_DISP_TILING_ORDER(tile_type) |
|
||||
S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
|
||||
evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
|
||||
S_030004_TEX_DEPTH(depth - 1) |
|
||||
S_030004_ARRAY_MODE(array_mode)));
|
||||
evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
|
||||
evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
|
||||
evergreen_emit_raw_value(res, (word4 |
|
||||
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
|
||||
S_030010_ENDIAN_SWAP(endian) |
|
||||
S_030010_BASE_LEVEL(0)));
|
||||
evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
|
||||
S_030014_BASE_ARRAY(0) |
|
||||
S_030014_LAST_ARRAY(0)));
|
||||
evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
|
||||
evergreen_emit_raw_value(res,
|
||||
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
|
||||
| S_03001C_DATA_FORMAT(format));
|
||||
|
||||
res->bo = (struct r600_resource*)view->base.texture;
|
||||
|
||||
res->usage = RADEON_USAGE_READ;
|
||||
|
||||
res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
|
||||
res->flags = COMPUTE_RES_TC_FLUSH;
|
||||
|
||||
evergreen_emit_force_reloc(res);
|
||||
evergreen_emit_force_reloc(res);
|
||||
}
|
||||
|
||||
void evergreen_set_sampler_resource(
|
||||
struct r600_pipe_compute *pipe,
|
||||
struct compute_sampler_state *sampler,
|
||||
int id)
|
||||
{
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
|
||||
|
||||
unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
|
||||
|
||||
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
|
||||
evergreen_emit_raw_value(res, (id + 90)*3);
|
||||
evergreen_emit_raw_value(res,
|
||||
S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
|
||||
S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
|
||||
S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
|
||||
S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
|
||||
S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
|
||||
S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
|
||||
);
|
||||
evergreen_emit_raw_value(res,
|
||||
S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
|
||||
S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
|
||||
);
|
||||
evergreen_emit_raw_value(res,
|
||||
S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
|
||||
(sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
|
||||
S_03C008_TYPE(1)
|
||||
);
|
||||
}
|
||||
|
||||
void evergreen_set_const_cache(
|
||||
struct r600_pipe_compute *pipe,
|
||||
int cache_id,
|
||||
struct r600_resource* cbo,
|
||||
int size, int offset)
|
||||
{
|
||||
#define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
|
||||
#define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
|
||||
|
||||
struct evergreen_compute_resource* res =
|
||||
get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
|
||||
|
||||
assert(size < 0x200);
|
||||
assert((offset & 0xFF) == 0);
|
||||
assert(cache_id < 16);
|
||||
|
||||
evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
|
||||
evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
|
||||
res->bo = cbo;
|
||||
res->usage = RADEON_USAGE_READ;
|
||||
res->coher_bo_size = size;
|
||||
res->flags = COMPUTE_RES_SH_FLUSH;
|
||||
}
|
||||
|
||||
struct r600_resource* r600_compute_buffer_alloc_vram(
|
||||
struct r600_screen *screen,
|
||||
unsigned size)
|
||||
{
|
||||
assert(size);
|
||||
|
||||
struct pipe_resource * buffer = pipe_buffer_create(
|
||||
(struct pipe_screen*) screen,
|
||||
PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_IMMUTABLE,
|
||||
size);
|
||||
|
||||
return (struct r600_resource *)buffer;
|
||||
}
|
||||
119
src/gallium/drivers/r600/evergreen_compute_internal.h
Normal file
119
src/gallium/drivers/r600/evergreen_compute_internal.h
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Adam Rak <adam.rak@streamnovation.com>
|
||||
*/
|
||||
|
||||
#ifndef EVERGREEN_COMPUTE_INTERNAL_H
|
||||
#define EVERGREEN_COMPUTE_INTERNAL_H
|
||||
|
||||
#include "compute_memory_pool.h"
|
||||
|
||||
enum evergreen_compute_resources
|
||||
{
|
||||
#define DECL_COMPUTE_RESOURCE(name, n) COMPUTE_RESOURCE_ ## name ,
|
||||
#include "compute_resource.def"
|
||||
#undef DECL_COMPUTE_RESOURCE
|
||||
__COMPUTE_RESOURCE_END__
|
||||
};
|
||||
|
||||
typedef unsigned u32;
|
||||
|
||||
#define COMPUTE_RES_TC_FLUSH 0xF0001
|
||||
#define COMPUTE_RES_VC_FLUSH 0xF0002
|
||||
#define COMPUTE_RES_SH_FLUSH 0xF0004
|
||||
#define COMPUTE_RES_CB_FLUSH(x) (0xF0008 | x << 8)
|
||||
#define COMPUTE_RES_FULL_FLUSH 0xF0010
|
||||
|
||||
struct evergreen_compute_resource {
|
||||
int enabled;
|
||||
|
||||
int do_reloc[256];
|
||||
u32 cs[256];
|
||||
int cs_end;
|
||||
|
||||
struct r600_resource *bo;
|
||||
int coher_bo_size;
|
||||
enum radeon_bo_usage usage;
|
||||
int flags; ///flags for COMPUTE_RES_*_FLUSH
|
||||
};
|
||||
|
||||
struct compute_sampler_state {
|
||||
struct r600_pipe_state base;
|
||||
struct pipe_sampler_state state;
|
||||
};
|
||||
|
||||
struct number_type_and_format {
|
||||
unsigned format;
|
||||
unsigned number_type;
|
||||
unsigned num_format_all;
|
||||
};
|
||||
|
||||
struct r600_pipe_compute {
|
||||
struct r600_context *ctx;
|
||||
struct r600_bytecode bc;
|
||||
struct tgsi_token *tokens;
|
||||
|
||||
struct evergreen_compute_resource *resources;
|
||||
|
||||
unsigned local_size;
|
||||
unsigned private_size;
|
||||
unsigned input_size;
|
||||
#ifdef HAVE_OPENCL
|
||||
LLVMModuleRef mod;
|
||||
#endif
|
||||
struct r600_resource *kernel_param;
|
||||
struct r600_resource *shader_code_bo;
|
||||
};
|
||||
|
||||
int evergreen_compute_get_gpu_format(struct number_type_and_format* fmt, struct r600_resource *bo); ///get hw format from resource, return 0 on faliure, nonzero on success
|
||||
|
||||
|
||||
void evergreen_emit_raw_reg_set(struct evergreen_compute_resource* res, unsigned index, int num);
|
||||
void evergreen_emit_ctx_reg_set(struct r600_context *ctx, unsigned index, int num);
|
||||
void evergreen_emit_raw_value(struct evergreen_compute_resource* res, unsigned value);
|
||||
void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value);
|
||||
void evergreen_mult_reg_set_(struct evergreen_compute_resource* res, int index, u32* array, int size);
|
||||
void evergreen_emit_ctx_reloc(struct r600_context *ctx, struct r600_resource *bo, enum radeon_bo_usage usage);
|
||||
void evergreen_reg_set(struct evergreen_compute_resource* res, unsigned index, unsigned value);
|
||||
void evergreen_emit_force_reloc(struct evergreen_compute_resource* res);
|
||||
|
||||
void evergreen_set_buffer_sync(struct r600_context *ctx, struct r600_resource* bo, int size, int flags, enum radeon_bo_usage usage);
|
||||
|
||||
struct evergreen_compute_resource* get_empty_res(struct r600_pipe_compute*, enum evergreen_compute_resources res_code, int index);
|
||||
int get_compute_resource_num(void);
|
||||
|
||||
#define evergreen_mult_reg_set(res, index, array) evergreen_mult_reg_set_(res, index, array, sizeof(array))
|
||||
|
||||
void evergreen_set_rat(struct r600_pipe_compute *pipe, int id, struct r600_resource* bo, int start, int size);
|
||||
void evergreen_set_lds(struct r600_pipe_compute *pipe, int num_lds, int size, int num_waves);
|
||||
void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t size);
|
||||
void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
|
||||
void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);
|
||||
void evergreen_set_tmp_ring(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size, int se);
|
||||
void evergreen_set_vtx_resource(struct r600_pipe_compute *pipe, struct r600_resource* bo, int id, uint64_t offset, int writable);
|
||||
void evergreen_set_tex_resource(struct r600_pipe_compute *pipe, struct r600_pipe_sampler_view* view, int id);
|
||||
void evergreen_set_sampler_resource(struct r600_pipe_compute *pipe, struct compute_sampler_state *sampler, int id);
|
||||
void evergreen_set_const_cache(struct r600_pipe_compute *pipe, int cache_id, struct r600_resource* cbo, int size, int offset);
|
||||
|
||||
struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size);
|
||||
|
||||
#endif
|
||||
|
|
@ -28,6 +28,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_framebuffer.h"
|
||||
#include "util/u_dual_blend.h"
|
||||
#include "evergreen_compute.h"
|
||||
|
||||
static uint32_t eg_num_banks(uint32_t nbanks)
|
||||
{
|
||||
|
|
@ -1881,6 +1882,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
|||
rctx->context.create_stream_output_target = r600_create_so_target;
|
||||
rctx->context.stream_output_target_destroy = r600_so_target_destroy;
|
||||
rctx->context.set_stream_output_targets = r600_set_so_targets;
|
||||
evergreen_init_compute_state_functions(rctx);
|
||||
}
|
||||
|
||||
static void cayman_init_atom_start_cs(struct r600_context *rctx)
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@
|
|||
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
|
||||
|
||||
#define PKT3_NOP 0x10
|
||||
#define PKT3_DISPATCH_DIRECT 0x15
|
||||
#define PKT3_DISPATCH_INDIRECT 0x16
|
||||
#define PKT3_INDIRECT_BUFFER_END 0x17
|
||||
#define PKT3_SET_PREDICATION 0x20
|
||||
#define PKT3_REG_RMW 0x21
|
||||
|
|
@ -114,6 +116,11 @@
|
|||
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
|
||||
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
|
||||
|
||||
#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
|
||||
|
||||
/*Evergreen Compute packet3*/
|
||||
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
|
||||
|
||||
/* Registers */
|
||||
#define R_0084FC_CP_STRMOUT_CNTL 0x000084FC
|
||||
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
|
||||
|
|
@ -241,6 +248,15 @@
|
|||
#define G_008CF0_ALU_UPDATE_FIFO_HIWATER(x) (((x) >> 24) & 0x1F)
|
||||
#define C_008CF0_ALU_UPDATE_FIFO_HIWATER(x) 0xE0FFFFFF
|
||||
|
||||
#define R_008E20_SQ_STATIC_THREAD_MGMT1 0x8E20
|
||||
#define R_008E24_SQ_STATIC_THREAD_MGMT2 0x8E24
|
||||
#define R_008E28_SQ_STATIC_THREAD_MGMT3 0x8E28
|
||||
|
||||
#define R_00899C_VGT_COMPUTE_START_X 0x0000899C
|
||||
#define R_0089A0_VGT_COMPUTE_START_Y 0x000089A0
|
||||
#define R_0089A4_VGT_COMPUTE_START_Z 0x000089A4
|
||||
#define R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE 0x000089AC
|
||||
|
||||
#define R_009100_SPI_CONFIG_CNTL 0x00009100
|
||||
#define R_00913C_SPI_CONFIG_CNTL_1 0x0000913C
|
||||
#define S_00913C_VTX_DONE_DELAY(x) (((x) & 0xF) << 0)
|
||||
|
|
@ -397,6 +413,11 @@
|
|||
#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1)
|
||||
#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF
|
||||
|
||||
#define R_0286EC_SPI_COMPUTE_NUM_THREAD_X 0x0286EC
|
||||
#define R_0286F0_SPI_COMPUTE_NUM_THREAD_Y 0x0286F0
|
||||
#define R_0286F4_SPI_COMPUTE_NUM_THREAD_Z 0x0286F4
|
||||
#define R_028B74_VGT_DISPATCH_INITIATOR 0x028B74
|
||||
|
||||
#define R_028800_DB_DEPTH_CONTROL 0x028800
|
||||
#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0)
|
||||
#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1)
|
||||
|
|
@ -747,6 +768,8 @@
|
|||
#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3)
|
||||
#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3)
|
||||
#define C_028A40_CUT_MODE 0xFFFFFFE7
|
||||
#define S_028A40_COMPUTE_MODE(x) (x << 14)
|
||||
#define S_028A40_PARTIAL_THD_AT_EOI(x) (x << 17)
|
||||
#define R_028A6C_VGT_GS_OUT_PRIM_TYPE 0x028A6C
|
||||
#define S_028A6C_OUTPRIM_TYPE(x) (((x) & 0x3F) << 0)
|
||||
#define V_028A6C_OUTPRIM_TYPE_POINTLIST 0
|
||||
|
|
@ -1434,6 +1457,50 @@
|
|||
#define G_028848_ALLOW_DOUBLE_DENORM_OUT(x) (((x) >> 7) & 0x1)
|
||||
#define C_028848_ALLOW_DOUBLE_DENORM_OUT 0xFFFFFF7F
|
||||
|
||||
#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
|
||||
#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
|
||||
#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
|
||||
#define C_0288D4_NUM_GPRS 0xFFFFFF00
|
||||
#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
|
||||
#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
|
||||
#define C_0288D4_STACK_SIZE 0xFFFF00FF
|
||||
#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
|
||||
#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
|
||||
#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
|
||||
#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
|
||||
#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
|
||||
#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
|
||||
#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
|
||||
#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
|
||||
#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
|
||||
#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
|
||||
#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
|
||||
|
||||
#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
|
||||
|
||||
|
||||
#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
|
||||
#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
|
||||
#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
|
||||
#define C_0288D4_NUM_GPRS 0xFFFFFF00
|
||||
#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
|
||||
#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
|
||||
#define C_0288D4_STACK_SIZE 0xFFFF00FF
|
||||
#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
|
||||
#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
|
||||
#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
|
||||
#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
|
||||
#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
|
||||
#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
|
||||
#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
|
||||
#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
|
||||
#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
|
||||
#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
|
||||
#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
|
||||
|
||||
#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
|
||||
|
||||
|
||||
#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644
|
||||
#define S_028644_SEMANTIC(x) (((x) & 0xFF) << 0)
|
||||
#define G_028644_SEMANTIC(x) (((x) >> 0) & 0xFF)
|
||||
|
|
@ -1710,6 +1777,12 @@
|
|||
#define R_0286DC_SPI_FOG_CNTL 0x000286DC
|
||||
#define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4
|
||||
#define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8
|
||||
#define S_0286E8_TID_IN_GROUP_ENA 1
|
||||
#define S_0286E8_TGID_ENA 2
|
||||
#define S_0286E8_DISABLE_INDEX_PACK 4
|
||||
#define R_028720_GDS_ADDR_BASE 0x00028720
|
||||
#define R_028724_GDS_ADDR_SIZE 0x00028724
|
||||
#define R_028728_GDS_ORDERED_WAVE_PER_SE 0x00028728
|
||||
#define R_028784_CB_BLEND1_CONTROL 0x00028784
|
||||
#define R_028788_CB_BLEND2_CONTROL 0x00028788
|
||||
#define R_02878C_CB_BLEND3_CONTROL 0x0002878C
|
||||
|
|
@ -1736,6 +1809,7 @@
|
|||
#define C_02884C_EXPORT_Z 0xFFFFFFFE
|
||||
#define R_02885C_SQ_PGM_START_VS 0x0002885C
|
||||
#define R_0288A4_SQ_PGM_START_FS 0x000288A4
|
||||
#define R_0288D0_SQ_PGM_START_LS 0x000288d0
|
||||
#define R_0288A8_SQ_PGM_RESOURCES_FS 0x000288A8
|
||||
#define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC
|
||||
#define R_028900_SQ_ESGS_RING_ITEMSIZE 0x00028900
|
||||
|
|
|
|||
19
src/gallium/drivers/r600/llvm_wrapper.cpp
Normal file
19
src/gallium/drivers/r600/llvm_wrapper.cpp
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#include <llvm/ADT/OwningPtr.h>
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Support/IRReader.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/SourceMgr.h>
|
||||
|
||||
#include "llvm_wrapper.h"
|
||||
|
||||
|
||||
extern "C" LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len)
|
||||
{
|
||||
llvm::OwningPtr<llvm::Module> M;
|
||||
llvm::StringRef str((const char*)bitcode, bitcode_len);
|
||||
llvm::MemoryBuffer* buffer = llvm::MemoryBuffer::getMemBufferCopy(str);
|
||||
llvm::SMDiagnostic Err;
|
||||
M.reset(llvm::ParseIR(buffer, Err, llvm::getGlobalContext()));
|
||||
return wrap(M.take());
|
||||
}
|
||||
16
src/gallium/drivers/r600/llvm_wrapper.h
Normal file
16
src/gallium/drivers/r600/llvm_wrapper.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef LLVM_WRAPPER_H
|
||||
#define LLVM_WRAPPER_H
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
#ifndef R600_LLVM_H
|
||||
#define R600_LLVM_H
|
||||
|
||||
#ifdef R600_USE_LLVM
|
||||
#if defined R600_USE_LLVM || defined HAVE_OPENCL
|
||||
|
||||
#include "radeon_llvm.h"
|
||||
#include <llvm-c/Core.h>
|
||||
|
|
@ -24,6 +24,6 @@ unsigned r600_llvm_compile(
|
|||
enum radeon_family family,
|
||||
unsigned dump);
|
||||
|
||||
#endif /* R600_USE_LLVM */
|
||||
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
|
||||
|
||||
#endif /* R600_LLVM_H */
|
||||
|
|
|
|||
|
|
@ -382,6 +382,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
|
||||
case PIPE_CAP_USER_INDEX_BUFFERS:
|
||||
case PIPE_CAP_USER_CONSTANT_BUFFERS:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
|
|
@ -409,7 +410,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
|
||||
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
|
||||
case PIPE_CAP_USER_VERTEX_BUFFERS:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
@ -491,6 +491,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||
{
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
case PIPE_SHADER_VERTEX:
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
/* XXX: support and enable geometry programs */
|
||||
|
|
@ -538,8 +539,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||
return rscreen->glsl_feature_level >= 130;
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
if (shader == PIPE_SHADER_COMPUTE) {
|
||||
return PIPE_SHADER_IR_LLVM;
|
||||
} else {
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -569,6 +574,81 @@ static int r600_get_video_param(struct pipe_screen *screen,
|
|||
}
|
||||
}
|
||||
|
||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
//TODO: select these params by asic
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_IR_TARGET:
|
||||
if (ret) {
|
||||
strcpy(ret, "r600--");
|
||||
}
|
||||
return 7 * sizeof(char);
|
||||
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
if (ret) {
|
||||
uint64_t * grid_dimension = ret;
|
||||
grid_dimension[0] = 3;
|
||||
}
|
||||
return 1 * sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * grid_size = ret;
|
||||
grid_size[0] = 65535;
|
||||
grid_size[1] = 65535;
|
||||
grid_size[2] = 1;
|
||||
}
|
||||
return 3 * sizeof(uint64_t) ;
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * block_size = ret;
|
||||
block_size[0] = 256;
|
||||
block_size[1] = 256;
|
||||
block_size[2] = 256;
|
||||
}
|
||||
return 3 * sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t * max_threads_per_block = ret;
|
||||
*max_threads_per_block = 256;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * max_global_size = ret;
|
||||
/* XXX: This is what the proprietary driver reports, we
|
||||
* may want to use a different value. */
|
||||
*max_global_size = 201326592;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * max_input_size = ret;
|
||||
*max_input_size = 1024;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * max_local_size = ret;
|
||||
/* XXX: This is what the proprietary driver reports, we
|
||||
* may want to use a different value. */
|
||||
*max_local_size = 32768;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
default:
|
||||
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void r600_destroy_screen(struct pipe_screen* pscreen)
|
||||
{
|
||||
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
|
||||
|
|
@ -576,6 +656,10 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
|
|||
if (rscreen == NULL)
|
||||
return;
|
||||
|
||||
if (rscreen->global_pool) {
|
||||
compute_memory_pool_delete(rscreen->global_pool);
|
||||
}
|
||||
|
||||
if (rscreen->fences.bo) {
|
||||
struct r600_fence_block *entry, *tmp;
|
||||
|
||||
|
|
@ -833,6 +917,8 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
|
|||
rscreen->screen.get_shader_param = r600_get_shader_param;
|
||||
rscreen->screen.get_paramf = r600_get_paramf;
|
||||
rscreen->screen.get_video_param = r600_get_video_param;
|
||||
rscreen->screen.get_compute_param = r600_get_compute_param;
|
||||
|
||||
if (rscreen->chip_class >= EVERGREEN) {
|
||||
rscreen->screen.is_format_supported = evergreen_is_format_supported;
|
||||
} else {
|
||||
|
|
@ -857,5 +943,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
|
|||
rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE);
|
||||
rscreen->glsl_feature_level = debug_get_bool_option("R600_GLSL130", TRUE) ? 130 : 120;
|
||||
|
||||
rscreen->global_pool = compute_memory_pool_new(1024*16, rscreen);
|
||||
|
||||
return &rscreen->screen;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,8 +28,11 @@
|
|||
|
||||
#include "util/u_slab.h"
|
||||
#include "r600.h"
|
||||
#include "r600_llvm.h"
|
||||
#include "r600_public.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_resource.h"
|
||||
#include "evergreen_compute.h"
|
||||
|
||||
#define R600_MAX_CONST_BUFFERS 2
|
||||
#define R600_MAX_CONST_BUFFER_SIZE 4096
|
||||
|
|
@ -98,9 +101,16 @@ enum r600_pipe_state_id {
|
|||
R600_PIPE_STATE_RESOURCE,
|
||||
R600_PIPE_STATE_POLYGON_OFFSET,
|
||||
R600_PIPE_STATE_FETCH_SHADER,
|
||||
R600_PIPE_STATE_SPI,
|
||||
R600_PIPE_NSTATES
|
||||
};
|
||||
|
||||
struct compute_memory_pool;
|
||||
void compute_memory_pool_delete(struct compute_memory_pool* pool);
|
||||
struct compute_memory_pool* compute_memory_pool_new(
|
||||
int64_t initial_size_in_dw,
|
||||
struct r600_screen *rscreen);
|
||||
|
||||
struct r600_pipe_fences {
|
||||
struct r600_resource *bo;
|
||||
unsigned *data;
|
||||
|
|
@ -123,6 +133,12 @@ struct r600_screen {
|
|||
|
||||
bool use_surface_alloc;
|
||||
int glsl_feature_level;
|
||||
|
||||
/*for compute global memory binding, we allocate stuff here, instead of
|
||||
* buffers.
|
||||
* XXX: Not sure if this is the best place for global_pool. Also,
|
||||
* it's not thread safe, so it won't work with multiple contexts. */
|
||||
struct compute_memory_pool *global_pool;
|
||||
};
|
||||
|
||||
struct r600_pipe_sampler_view {
|
||||
|
|
@ -257,6 +273,7 @@ struct r600_context {
|
|||
struct pipe_clip_state clip;
|
||||
struct r600_pipe_shader *ps_shader;
|
||||
struct r600_pipe_shader *vs_shader;
|
||||
struct r600_pipe_compute *cs_shader;
|
||||
struct r600_pipe_rasterizer *rasterizer;
|
||||
struct r600_pipe_state vgt;
|
||||
struct r600_pipe_state spi;
|
||||
|
|
@ -266,7 +283,9 @@ struct r600_context {
|
|||
unsigned saved_render_cond_mode;
|
||||
/* shader information */
|
||||
boolean two_side;
|
||||
boolean spi_dirty;
|
||||
unsigned sprite_coord_enable;
|
||||
boolean flatshade;
|
||||
boolean export_16bpc;
|
||||
unsigned alpha_ref;
|
||||
boolean alpha_ref_dirty;
|
||||
|
|
@ -412,6 +431,10 @@ void r600_init_context_resource_functions(struct r600_context *r600);
|
|||
|
||||
/* r600_shader.c */
|
||||
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader);
|
||||
#ifdef HAVE_OPENCL
|
||||
int r600_compute_shader_create(struct pipe_context * ctx,
|
||||
LLVMModuleRef mod, struct r600_bytecode * bytecode);
|
||||
#endif
|
||||
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
|
||||
int r600_find_vs_semantic_index(struct r600_shader *vs,
|
||||
struct r600_shader *ps, int id);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,12 @@ static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
|
|||
const struct pipe_resource *templ)
|
||||
{
|
||||
if (templ->target == PIPE_BUFFER) {
|
||||
return r600_buffer_create(screen, templ);
|
||||
if (templ->bind & PIPE_BIND_GLOBAL) {
|
||||
return r600_compute_global_buffer_create(screen, templ);
|
||||
}
|
||||
else {
|
||||
return r600_buffer_create(screen, templ);
|
||||
}
|
||||
} else {
|
||||
return r600_texture_create(screen, templ);
|
||||
}
|
||||
|
|
@ -44,12 +49,21 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre
|
|||
}
|
||||
}
|
||||
|
||||
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res)
|
||||
{
|
||||
if (res->target == PIPE_BUFFER && (res->bind & PIPE_BIND_GLOBAL)) {
|
||||
r600_compute_global_buffer_destroy(screen, res);
|
||||
} else {
|
||||
u_resource_destroy_vtbl(screen, res);
|
||||
}
|
||||
}
|
||||
|
||||
void r600_init_screen_resource_functions(struct pipe_screen *screen)
|
||||
{
|
||||
screen->resource_create = r600_resource_create;
|
||||
screen->resource_from_handle = r600_resource_from_handle;
|
||||
screen->resource_get_handle = u_resource_get_handle_vtbl;
|
||||
screen->resource_destroy = u_resource_destroy_vtbl;
|
||||
screen->resource_destroy = r600_resource_destroy;
|
||||
}
|
||||
|
||||
void r600_init_context_resource_functions(struct r600_context *r600)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,13 @@ struct r600_transfer {
|
|||
unsigned offset;
|
||||
};
|
||||
|
||||
struct compute_memory_item;
|
||||
|
||||
struct r600_resource_global {
|
||||
struct r600_resource base;
|
||||
struct compute_memory_item *chunk;
|
||||
};
|
||||
|
||||
struct r600_resource_texture {
|
||||
struct r600_resource resource;
|
||||
|
||||
|
|
@ -65,6 +72,7 @@ struct r600_surface {
|
|||
unsigned aligned_height;
|
||||
};
|
||||
|
||||
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
|
||||
void r600_init_screen_resource_functions(struct pipe_screen *screen);
|
||||
|
||||
/* r600_texture */
|
||||
|
|
|
|||
|
|
@ -225,6 +225,37 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
|
|||
* struct r600_bytecode.
|
||||
*/
|
||||
|
||||
static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
|
||||
unsigned char * bytes, unsigned num_bytes);
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
int r600_compute_shader_create(struct pipe_context * ctx,
|
||||
LLVMModuleRef mod, struct r600_bytecode * bytecode)
|
||||
{
|
||||
struct r600_context *r600_ctx = (struct r600_context *)ctx;
|
||||
unsigned char * bytes;
|
||||
unsigned byte_count;
|
||||
struct r600_shader_ctx shader_ctx;
|
||||
unsigned dump = 0;
|
||||
|
||||
if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
|
||||
dump = 1;
|
||||
}
|
||||
|
||||
r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
|
||||
shader_ctx.bc = bytecode;
|
||||
r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
|
||||
shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
|
||||
r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
|
||||
r600_bytecode_build(shader_ctx.bc);
|
||||
if (dump) {
|
||||
r600_bytecode_dump(shader_ctx.bc);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* HAVE_OPENCL */
|
||||
|
||||
static unsigned r600_src_from_byte_stream(unsigned char * bytes,
|
||||
unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -916,6 +916,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
|
|||
unsigned offset = 0;
|
||||
char *map;
|
||||
|
||||
if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
|
||||
return r600_compute_global_transfer_map(ctx, transfer);
|
||||
}
|
||||
|
||||
if (rtransfer->staging) {
|
||||
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
|
||||
} else {
|
||||
|
|
@ -945,6 +949,10 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
|
|||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct radeon_winsys_cs_handle *buf;
|
||||
|
||||
if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
|
||||
return r600_compute_global_transfer_unmap(ctx, transfer);
|
||||
}
|
||||
|
||||
if (rtransfer->staging) {
|
||||
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue