r600g: compute support for evergreen

Tom Stellard:
  - Updated for gallium interface changes
  - Fixed a few bugs:
    + Set the loop counter
    + Calculate the correct number of pipes
  - Added hooks into the LLVM compiler
This commit is contained in:
Adam Rak 2011-11-30 22:20:41 +01:00 committed by Tom Stellard
parent 46a13b3b11
commit 6a829a1b72
21 changed files with 2680 additions and 13 deletions

View file

@ -1993,13 +1993,18 @@ if test "x$with_gallium_drivers" != x; then
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
gallium_require_drm_loader
GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
if test "x$enable_r600_llvm" = xyes; then
if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then
if test "x$LLVM_VERSION" != "x3.1"; then
AC_MSG_ERROR([LLVM 3.1 is required for the r600 llvm compiler.])
fi
NEED_RADEON_GALLIUM=yes;
fi
if test "x$enable_r600_llvm" = xyes; then
USE_R600_LLVM_COMPILER=yes;
fi
if test "x$enable_opencl" = xyes -a "x$with_llvm_shared_libs" = xno; then
LLVM_LIBS="${LLVM_LIBS} `llvm-config --libs bitreader asmparser`"
fi
gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
;;
xradeonsi)

View file

@ -18,7 +18,7 @@ AM_CFLAGS = \
libr600_a_SOURCES = \
$(C_SOURCES)
if USE_R600_LLVM_COMPILER
if NEED_RADEON_GALLIUM
# This is a hack until we can move the backend into the LLVM project.
# We need to use mklib, because it splits up libradeon.a into object files
@ -26,18 +26,28 @@ if USE_R600_LLVM_COMPILER
libr600_a_AR = $(top_srcdir)/bin/mklib -o r600 -static
libr600_a_SOURCES += \
$(LLVM_C_SOURCES)
$(LLVM_C_SOURCES) \
$(LLVM_CXX_SOURCES)
libr600_a_LIBADD = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.a
AM_CFLAGS += \
$(LLVM_CFLAGS) \
-I$(top_srcdir)/src/gallium/drivers/radeon/ \
-DR600_USE_LLVM
-I$(top_srcdir)/src/gallium/drivers/radeon/
AM_CXXFLAGS= \
$(LLVM_CXXFLAGS)
else
libr600_a_AR = $(AR) $(ARFLAGS)
endif
if USE_R600_LLVM_COMPILER
AM_CFLAGS += \
-DR600_USE_LLVM
endif
if HAVE_GALLIUM_COMPUTE
AM_CFLAGS += \
-DHAVE_OPENCL
endif

View file

@ -14,6 +14,10 @@ C_SOURCES = \
evergreen_state.c \
eg_asm.c \
r600_translate.c \
r600_state_common.c
r600_state_common.c \
evergreen_compute.c \
evergreen_compute_internal.c \
compute_memory_pool.c
LLVM_C_SOURCES = r600_llvm.c
LLVM_CXX_SOURCES = llvm_wrapper.cpp

View file

@ -0,0 +1,397 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "util/u_blitter.h"
#include "util/u_double_list.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
#include "util/u_pack_color.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_framebuffer.h"
#include "r600.h"
#include "r600_resource.h"
#include "r600_shader.h"
#include "r600_pipe.h"
#include "r600_formats.h"
#include "compute_memory_pool.h"
#include "evergreen_compute_internal.h"
/**
* Creates a new pool
*/
struct compute_memory_pool* compute_memory_pool_new(
int64_t initial_size_in_dw,
struct r600_screen * rscreen)
{
struct compute_memory_pool* pool = (struct compute_memory_pool*)
CALLOC(sizeof(struct compute_memory_pool), 1);
pool->next_id = 1;
pool->size_in_dw = initial_size_in_dw;
pool->screen = rscreen;
pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
pool->screen, pool->size_in_dw*4);
pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
return pool;
}
/**
* Frees all stuff in the pool and the pool struct itself too
*/
void compute_memory_pool_delete(struct compute_memory_pool* pool)
{
free(pool->shadow);
pool->screen->screen.resource_destroy((struct pipe_screen *)
pool->screen, (struct pipe_resource *)pool->bo);
free(pool);
}
/**
* Searches for an empty space in the pool, return with the pointer to the
* allocatable space in the pool, returns -1 on failure.
*/
int64_t compute_memory_prealloc_chunk(
struct compute_memory_pool* pool,
int64_t size_in_dw)
{
assert(size_in_dw <= pool->size_in_dw);
struct compute_memory_item *item;
int last_end = 0;
for (item = pool->item_list; item; item = item->next) {
if (item->start_in_dw > -1) {
if (item->start_in_dw-last_end > size_in_dw) {
return last_end;
}
last_end = item->start_in_dw + item->size_in_dw;
last_end += (1024 - last_end % 1024);
}
}
if (pool->size_in_dw - last_end < size_in_dw) {
return -1;
}
return last_end;
}
/**
* Search for the chunk where we can link our new chunk after it.
*/
struct compute_memory_item* compute_memory_postalloc_chunk(
struct compute_memory_pool* pool,
int64_t start_in_dw)
{
struct compute_memory_item* item;
for (item = pool->item_list; item; item = item->next) {
if (item->next) {
if (item->start_in_dw < start_in_dw
&& item->next->start_in_dw > start_in_dw) {
return item;
}
}
else {
/* end of chain */
assert(item->start_in_dw < start_in_dw);
return item;
}
}
assert(0 && "unreachable");
return NULL;
}
/**
* Reallocates pool, conserves data
*/
void compute_memory_grow_pool(struct compute_memory_pool* pool,
struct pipe_context * pipe, int new_size_in_dw)
{
assert(new_size_in_dw >= pool->size_in_dw);
new_size_in_dw += 1024 - (new_size_in_dw % 1024);
compute_memory_shadow(pool, pipe, 1);
pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
pool->size_in_dw = new_size_in_dw;
pool->screen->screen.resource_destroy(
(struct pipe_screen *)pool->screen,
(struct pipe_resource *)pool->bo);
pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
pool->size_in_dw*4);
compute_memory_shadow(pool, pipe, 0);
}
/**
* Copy pool from device to host, or host to device.
*/
void compute_memory_shadow(struct compute_memory_pool* pool,
struct pipe_context * pipe, int device_to_host)
{
struct compute_memory_item chunk;
chunk.id = 0;
chunk.start_in_dw = 0;
chunk.size_in_dw = pool->size_in_dw;
chunk.prev = chunk.next = NULL;
compute_memory_transfer(pool, pipe, device_to_host, &chunk,
pool->shadow, 0, pool->size_in_dw*4);
}
/**
* Allocates pending allocations in the pool
*/
void compute_memory_finalize_pending(struct compute_memory_pool* pool,
struct pipe_context * pipe)
{
struct compute_memory_item *pending_list = NULL, *end_p = NULL;
struct compute_memory_item *item, *next;
int64_t allocated = 0;
int64_t unallocated = 0;
for (item = pool->item_list; item; item = item->next) {
COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
}
for (item = pool->item_list; item; item = next) {
next = item->next;
if (item->start_in_dw == -1) {
if (end_p) {
end_p->next = item;
}
else {
pending_list = item;
}
if (item->prev) {
item->prev->next = next;
}
else {
pool->item_list = next;
}
if (next) {
next->prev = item->prev;
}
item->prev = end_p;
item->next = NULL;
end_p = item;
unallocated += item->size_in_dw+1024;
}
else {
allocated += item->size_in_dw;
}
}
if (pool->size_in_dw < allocated+unallocated) {
compute_memory_grow_pool(pool, pipe, allocated+unallocated);
}
for (item = pending_list; item; item = next) {
next = item->next;
int64_t start_in_dw;
while ((start_in_dw=compute_memory_prealloc_chunk(pool,
item->size_in_dw)) == -1) {
int64_t need = item->size_in_dw+2048 -
(pool->size_in_dw - allocated);
need += 1024 - (need % 1024);
if (need > 0) {
compute_memory_grow_pool(pool,
pipe,
pool->size_in_dw + need);
}
else {
need = pool->size_in_dw / 10;
need += 1024 - (need % 1024);
compute_memory_grow_pool(pool,
pipe,
pool->size_in_dw + need);
}
}
item->start_in_dw = start_in_dw;
item->next = NULL;
item->prev = NULL;
if (pool->item_list) {
struct compute_memory_item *pos;
pos = compute_memory_postalloc_chunk(pool, start_in_dw);
item->prev = pos;
item->next = pos->next;
pos->next = item;
if (item->next) {
item->next->prev = item;
}
}
else {
pool->item_list = item;
}
allocated += item->size_in_dw;
}
}
void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
{
struct compute_memory_item *item, *next;
for (item = pool->item_list; item; item = next) {
next = item->next;
if (item->id == id) {
if (item->prev) {
item->prev->next = item->next;
}
else {
pool->item_list = item->next;
}
if (item->next) {
item->next->prev = item->prev;
}
free(item);
return;
}
}
fprintf(stderr, "Internal error, invalid id %ld "
"for compute_memory_free\n", id);
assert(0 && "error");
}
/**
* Creates pending allocations
*/
struct compute_memory_item* compute_memory_alloc(
struct compute_memory_pool* pool,
int64_t size_in_dw)
{
struct compute_memory_item *new_item;
COMPUTE_DBG("Alloc: %i\n", size_in_dw);
new_item = (struct compute_memory_item *)
CALLOC(sizeof(struct compute_memory_item), 1);
new_item->size_in_dw = size_in_dw;
new_item->start_in_dw = -1; /* mark pending */
new_item->id = pool->next_id++;
new_item->pool = pool;
struct compute_memory_item *last_item;
if (pool->item_list) {
for (last_item = pool->item_list; last_item->next;
last_item = last_item->next);
last_item->next = new_item;
new_item->prev = last_item;
}
else {
pool->item_list = new_item;
}
return new_item;
}
/**
* Transfer data host<->device, offset and size is in bytes
*/
void compute_memory_transfer(
struct compute_memory_pool* pool,
struct pipe_context * pipe,
int device_to_host,
struct compute_memory_item* chunk,
void* data,
int offset_in_chunk,
int size)
{
int64_t aligned_size = pool->size_in_dw;
struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
struct pipe_transfer *xfer;
uint32_t *map;
if (device_to_host)
{
xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
&(struct pipe_box) { .width = aligned_size,
.height = 1, .depth = 1 });
assert(xfer);
map = pipe->transfer_map(pipe, xfer);
assert(map);
memcpy(data, map + internal_offset, size);
pipe->transfer_unmap(pipe, xfer);
pipe->transfer_destroy(pipe, xfer);
} else {
xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
&(struct pipe_box) { .width = aligned_size,
.height = 1, .depth = 1 });
assert(xfer);
map = pipe->transfer_map(pipe, xfer);
assert(map);
memcpy(map + internal_offset, data, size);
pipe->transfer_unmap(pipe, xfer);
pipe->transfer_destroy(pipe, xfer);
}
}
/**
* Transfer data between chunk<->data, it is for VRAM<->GART transfers
*/
void compute_memory_transfer_direct(
struct compute_memory_pool* pool,
int chunk_to_data,
struct compute_memory_item* chunk,
struct r600_resource* data,
int offset_in_chunk,
int offset_in_data,
int size)
{
///TODO: DMA
}

View file

@ -0,0 +1,98 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#ifndef COMPUTE_MEMORY_POOL
#define COMPUTE_MEMORY_POOL
#include <stdlib.h>
struct compute_memory_pool;
struct compute_memory_item
{
int64_t id; ///ID of the memory chunk
int untouched; ///True if the memory contains only junk, no need to save it for defrag
int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
int64_t size_in_dw; ///Size of the chunk in dwords
struct compute_memory_pool* pool;
struct compute_memory_item* prev;
struct compute_memory_item* next;
};
struct compute_memory_pool
{
int64_t next_id; ///For generating unique IDs for memory chunks
int64_t size_in_dw; ///Size of the pool in dwords
struct r600_resource *bo; ///The pool buffer object resource
struct compute_memory_item* item_list; ///Allocated memory chunks in the buffer,they must be ordered by "start_in_dw"
struct r600_screen *screen;
uint32_t *shadow; ///host copy of the pool, used for defragmentation
};
struct compute_memory_pool* compute_memory_pool_new(int64_t initial_size_in_dw, struct r600_screen *rscreen); ///Creates a new pool
void compute_memory_pool_delete(struct compute_memory_pool* pool); ///Frees all stuff in the pool and the pool struct itself too
int64_t compute_memory_prealloc_chunk(struct compute_memory_pool* pool, int64_t size_in_dw); ///searches for an empty space in the pool, return with the pointer to the allocatable space in the pool, returns -1 on failure
struct compute_memory_item* compute_memory_postalloc_chunk(struct compute_memory_pool* pool, int64_t start_in_dw); ///search for the chunk where we can link our new chunk after it
/**
* reallocates pool, conserves data
*/
void compute_memory_grow_pool(struct compute_memory_pool* pool, struct pipe_context * pipe,
int new_size_in_dw);
/**
* Copy pool from device to host, or host to device
*/
void compute_memory_shadow(struct compute_memory_pool* pool,
struct pipe_context * pipe, int device_to_host);
/**
* Allocates pending allocations in the pool
*/
void compute_memory_finalize_pending(struct compute_memory_pool* pool,
struct pipe_context * pipe);
void compute_memory_defrag(struct compute_memory_pool* pool); ///Defragment the memory pool, always heavy memory usage
void compute_memory_free(struct compute_memory_pool* pool, int64_t id);
struct compute_memory_item* compute_memory_alloc(struct compute_memory_pool* pool, int64_t size_in_dw); ///Creates pending allocations
/**
* Transfer data host<->device, offset and size is in bytes
*/
void compute_memory_transfer(struct compute_memory_pool* pool,
struct pipe_context * pipe, int device_to_host,
struct compute_memory_item* chunk, void* data,
int offset_in_chunk, int size);
void compute_memory_transfer_direct(struct compute_memory_pool* pool, int chunk_to_data, struct compute_memory_item* chunk, struct r600_resource* data, int offset_in_chunk, int offset_in_data, int size); ///Transfer data between chunk<->data, it is for VRAM<->GART transfers
#endif

View file

@ -0,0 +1,38 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
DECL_COMPUTE_RESOURCE(CONFIG, 1)
DECL_COMPUTE_RESOURCE(CONST_MEM, 16)
DECL_COMPUTE_RESOURCE(RAT, 12)
DECL_COMPUTE_RESOURCE(VERT, 16)
DECL_COMPUTE_RESOURCE(TEX, 16)
DECL_COMPUTE_RESOURCE(SAMPLER, 18)
DECL_COMPUTE_RESOURCE(LOOP, 32)
DECL_COMPUTE_RESOURCE(LDS, 1)
DECL_COMPUTE_RESOURCE(GDS, 1)
DECL_COMPUTE_RESOURCE(EXPORT, 1)
DECL_COMPUTE_RESOURCE(SHADER, 1)
DECL_COMPUTE_RESOURCE(TMPRING, 4)
DECL_COMPUTE_RESOURCE(DISPATCH, 1)

View file

@ -0,0 +1,814 @@
/*
* Copyright 2011 Adam Rak <adam.rak@streamnovation.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "util/u_blitter.h"
#include "util/u_double_list.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
#include "util/u_pack_color.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_framebuffer.h"
#include "pipebuffer/pb_buffer.h"
#include "r600.h"
#include "evergreend.h"
#include "r600_resource.h"
#include "r600_shader.h"
#include "r600_pipe.h"
#include "r600_formats.h"
#include "evergreen_compute.h"
#include "r600_hw_context_priv.h"
#include "evergreen_compute_internal.h"
#include "compute_memory_pool.h"
#ifdef HAVE_OPENCL
#include "llvm_wrapper.h"
#endif
/**
RAT0 is for global binding write
VTX1 is for global binding read
for wrting images RAT1...
for reading images TEX2...
TEX2-RAT1 is paired
TEX2... consumes the same fetch resources, that VTX2... would consume
CONST0 and VTX0 is for parameters
CONST0 is binding smaller input parameter buffer, and for constant indexing,
also constant cached
VTX0 is for indirect/non-constant indexing, or if the input is bigger than
the constant cache can handle
RAT-s are limited to 12, so we can only bind at most 11 texture for writing
because we reserve RAT0 for global bindings. With byteaddressing enabled,
we should reserve another one too.=> 10 image binding for writing max.
from Nvidia OpenCL:
CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
so 10 for writing is enough. 176 is the max for reading according to the docs
writable images should be listed first < 10, so their id corresponds to RAT(id+1)
writable images will consume TEX slots, VTX slots too because of linear indexing
*/
const struct u_resource_vtbl r600_global_buffer_vtbl =
{
u_default_resource_get_handle, /* get_handle */
r600_compute_global_buffer_destroy, /* resource_destroy */
r600_compute_global_get_transfer, /* get_transfer */
r600_compute_global_transfer_destroy, /* transfer_destroy */
r600_compute_global_transfer_map, /* transfer_map */
r600_compute_global_transfer_flush_region,/* transfer_flush_region */
r600_compute_global_transfer_unmap, /* transfer_unmap */
r600_compute_global_transfer_inline_write /* transfer_inline_write */
};
void *evergreen_create_compute_state(
struct pipe_context *ctx_,
const const struct pipe_compute_state *cso)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
#ifdef HAVE_OPENCL
const struct pipe_llvm_program_header * header;
const unsigned char * code;
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
#endif
if (!ctx->screen->screen.get_param(&ctx->screen->screen,
PIPE_CAP_COMPUTE)) {
fprintf(stderr, "Compute is not supported\n");
return NULL;
}
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
shader->ctx = (struct r600_context*)ctx;
shader->resources = (struct evergreen_compute_resource*)
CALLOC(sizeof(struct evergreen_compute_resource),
get_compute_resource_num());
shader->local_size = cso->req_local_mem; ///TODO: assert it
shader->private_size = cso->req_private_mem;
shader->input_size = cso->req_input_mem;
#ifdef HAVE_OPENCL
shader->mod = llvm_parse_bitcode(code, header->num_bytes);
r600_compute_shader_create(ctx_, shader->mod, &shader->bc);
#endif
return shader;
}
void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
{
struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
free(shader->resources);
free(shader);
}
static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
ctx->cs_shader = (struct r600_pipe_compute *)state;
assert(!ctx->cs_shader->shader_code_bo);
ctx->cs_shader->shader_code_bo =
r600_compute_buffer_alloc_vram(ctx->screen,
ctx->cs_shader->bc.ndw * 4);
void *p = ctx->ws->buffer_map(ctx->cs_shader->shader_code_bo->cs_buf,
ctx->cs, PIPE_TRANSFER_WRITE);
memcpy(p, ctx->cs_shader->bc.bytecode, ctx->cs_shader->bc.ndw * 4);
ctx->ws->buffer_unmap(ctx->cs_shader->shader_code_bo->cs_buf);
evergreen_compute_init_config(ctx);
struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
COMPUTE_RESOURCE_SHADER, 0);
evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3,
S_008C0C_NUM_LS_GPRS(ctx->cs_shader->bc.ngpr));
///maybe we can use it later
evergreen_reg_set(res, R_0286C8_SPI_THREAD_GROUPING, 0);
///maybe we can use it later
evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
evergreen_reg_set(res, R_0288D4_SQ_PGM_RESOURCES_LS,
S_0288D4_NUM_GPRS(ctx->cs_shader->bc.ngpr)
| S_0288D4_STACK_SIZE(ctx->cs_shader->bc.nstack));
evergreen_reg_set(res, R_0288D8_SQ_PGM_RESOURCES_LS_2, 0);
evergreen_reg_set(res, R_0288D0_SQ_PGM_START_LS, 0);
res->bo = ctx->cs_shader->shader_code_bo;
res->usage = RADEON_USAGE_READ;
res->coher_bo_size = ctx->cs_shader->bc.ndw*4;
res->flags = COMPUTE_RES_SH_FLUSH;
/* We can't always determine the
* number of iterations in a loop before it's executed,
* so we just need to set up the loop counter to give us the maximum
* number of iterations possible. Currently, loops in shader code
* ignore the loop counter and use a break instruction to exit the
* loop at the correct time.
*/
evergreen_set_loop_const(ctx->cs_shader,
0, /* index */
0xFFF, /* Maximum value of the loop counter (i.e. when the loop
* counter reaches this value, the program will break
* out of the loop. */
0x0, /* Starting value of the loop counter. */
0x1); /* Amount to increment the loop counter each iteration. */
}
/* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
* kernel parameters there are inplicit parameters that need to be stored
* in the vertex buffer as well. Here is how these parameters are organized in
* the buffer:
*
* DWORDS 0-2: Number of work groups in each dimension (x,y,z)
* DWORDS 3-5: Number of global work items in each dimension (x,y,z)
* DWORDS 6-8: Number of work items within each work group in each dimension
* (x,y,z)
* DWORDS 9+ : Kernel parameters
*/
void evergreen_compute_upload_input(
struct pipe_context *ctx_,
const uint *block_layout,
const uint *grid_layout,
const void *input)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
int i;
unsigned kernel_parameters_offset_bytes = 36;
uint32_t * num_work_groups_start;
uint32_t * global_size_start;
uint32_t * local_size_start;
uint32_t * kernel_parameters_start;
if (ctx->cs_shader->input_size == 0) {
return;
}
if (!ctx->cs_shader->kernel_param) {
unsigned buffer_size = ctx->cs_shader->input_size;
/* Add space for the grid dimensions */
buffer_size += kernel_parameters_offset_bytes * sizeof(uint);
ctx->cs_shader->kernel_param =
r600_compute_buffer_alloc_vram(ctx->screen,
buffer_size);
}
num_work_groups_start = ctx->ws->buffer_map(
ctx->cs_shader->kernel_param->cs_buf,
ctx->cs, PIPE_TRANSFER_WRITE);
global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
/* Copy the work group size */
memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
/* Copy the global size */
for (i = 0; i < 3; i++) {
global_size_start[i] = grid_layout[i] * block_layout[i];
}
/* Copy the local dimensions */
memcpy(local_size_start, block_layout, 3 * sizeof(uint));
/* Copy the kernel inputs */
memcpy(kernel_parameters_start, input, ctx->cs_shader->input_size);
for (i = 0; i < (kernel_parameters_offset_bytes / 4) +
(ctx->cs_shader->input_size / 4); i++) {
COMPUTE_DBG("input %i : %i\n", i,
((unsigned*)num_work_groups_start)[i]);
}
ctx->ws->buffer_unmap(ctx->cs_shader->kernel_param->cs_buf);
///ID=0 is reserved for the parameters
evergreen_set_vtx_resource(ctx->cs_shader,
ctx->cs_shader->kernel_param, 0, 0, 0);
///ID=0 is reserved for parameters
evergreen_set_const_cache(ctx->cs_shader, 0,
ctx->cs_shader->kernel_param, ctx->cs_shader->input_size, 0);
}
void evergreen_direct_dispatch(
struct pipe_context *ctx_,
const uint *block_layout, const uint *grid_layout)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
int i;
struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
COMPUTE_RESOURCE_DISPATCH, 0);
evergreen_reg_set(res, R_008958_VGT_PRIMITIVE_TYPE, V_008958_DI_PT_POINTLIST);
evergreen_reg_set(res, R_00899C_VGT_COMPUTE_START_X, 0);
evergreen_reg_set(res, R_0089A0_VGT_COMPUTE_START_Y, 0);
evergreen_reg_set(res, R_0089A4_VGT_COMPUTE_START_Z, 0);
evergreen_reg_set(res, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, block_layout[0]);
evergreen_reg_set(res, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y, block_layout[1]);
evergreen_reg_set(res, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z, block_layout[2]);
int group_size = 1;
int grid_size = 1;
for (i = 0; i < 3; i++) {
group_size *= block_layout[i];
}
for (i = 0; i < 3; i++) {
grid_size *= grid_layout[i];
}
evergreen_reg_set(res, R_008970_VGT_NUM_INDICES, group_size);
evergreen_reg_set(res, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, group_size);
evergreen_emit_raw_value(res, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
evergreen_emit_raw_value(res, grid_layout[0]);
evergreen_emit_raw_value(res, grid_layout[1]);
evergreen_emit_raw_value(res, grid_layout[2]);
///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN
evergreen_emit_raw_value(res, 1);
}
static void compute_emit_cs(struct r600_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->cs;
int i;
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
struct r600_resource *onebo = NULL;
for (i = 0; i < get_compute_resource_num(); i++) {
if (ctx->cs_shader->resources[i].enabled) {
int j;
COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw);
for (j = 0; j < ctx->cs_shader->resources[i].cs_end; j++) {
if (ctx->cs_shader->resources[i].do_reloc[j]) {
assert(ctx->cs_shader->resources[i].bo);
evergreen_emit_ctx_reloc(ctx,
ctx->cs_shader->resources[i].bo,
ctx->cs_shader->resources[i].usage);
}
cs->buf[cs->cdw++] = ctx->cs_shader->resources[i].cs[j];
}
if (ctx->cs_shader->resources[i].bo) {
onebo = ctx->cs_shader->resources[i].bo;
evergreen_emit_ctx_reloc(ctx,
ctx->cs_shader->resources[i].bo,
ctx->cs_shader->resources[i].usage);
///special case for textures
if (ctx->cs_shader->resources[i].do_reloc
[ctx->cs_shader->resources[i].cs_end] == 2) {
evergreen_emit_ctx_reloc(ctx,
ctx->cs_shader->resources[i].bo,
ctx->cs_shader->resources[i].usage);
}
evergreen_set_buffer_sync(ctx, ctx->cs_shader->resources[i].bo,
ctx->cs_shader->resources[i].coher_bo_size,
ctx->cs_shader->resources[i].flags,
ctx->cs_shader->resources[i].usage);
}
}
}
#if 0
COMPUTE_DBG("cdw: %i\n", cs->cdw);
for (i = 0; i < cs->cdw; i++) {
COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]);
}
#endif
ctx->ws->cs_flush(ctx->cs, RADEON_FLUSH_ASYNC);
ctx->pm4_dirty_cdwords = 0;
ctx->flags = 0;
COMPUTE_DBG("shader started\n");
ctx->ws->buffer_wait(onebo->buf, 0);
COMPUTE_DBG("...\n");
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
ctx->streamout_start = TRUE;
ctx->streamout_append_bitmask = ~0;
}
static void evergreen_launch_grid(
struct pipe_context *ctx_,
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{
COMPUTE_DBG("PC: %i\n", pc);
struct r600_context *ctx = (struct r600_context *)ctx_;
unsigned num_waves;
unsigned num_pipes = ctx->screen->info.r600_max_pipes;
unsigned wave_divisor = (16 * num_pipes);
/* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
wave_divisor - 1) / wave_divisor;
COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
num_pipes, num_waves);
evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
compute_emit_cs(ctx);
}
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
unsigned start, unsigned count,
struct pipe_surface ** surfaces)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_surface **resources = (struct r600_surface **)surfaces;
for (int i = 0; i < count; i++) {
if (resources[i]) {
struct r600_resource_global *buffer =
(struct r600_resource_global*)resources[i]->base.texture;
if (resources[i]->base.writable) {
assert(i+1 < 12);
struct r600_resource_global *buffer =
(struct r600_resource_global*)
resources[i]->base.texture;
evergreen_set_rat(ctx->cs_shader, i+1,
(struct r600_resource *)resources[i]->base.texture,
buffer->chunk->start_in_dw*4,
resources[i]->base.texture->width0);
}
evergreen_set_vtx_resource(ctx->cs_shader,
(struct r600_resource *)resources[i]->base.texture, i+2,
buffer->chunk->start_in_dw*4, resources[i]->base.writable);
}
}
}
static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
unsigned start_slot, unsigned count,
struct pipe_sampler_view **views)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_sampler_view **resource =
(struct r600_pipe_sampler_view **)views;
for (int i = 0; i < count; i++) {
if (resource[i]) {
assert(i+1 < 12);
///FETCH0 = VTX0 (param buffer),
//FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
evergreen_set_tex_resource(ctx->cs_shader, resource[i], i+2);
}
}
}
static void evergreen_bind_compute_sampler_states(
struct pipe_context *ctx_,
unsigned start_slot,
unsigned num_samplers,
void **samplers_)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct compute_sampler_state ** samplers =
(struct compute_sampler_state **)samplers_;
for (int i = 0; i < num_samplers; i++) {
if (samplers[i]) {
evergreen_set_sampler_resource(ctx->cs_shader, samplers[i], i);
}
}
}
static void evergreen_set_global_binding(
struct pipe_context *ctx_, unsigned first, unsigned n,
struct pipe_resource **resources,
uint32_t **handles)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct compute_memory_pool *pool = ctx->screen->global_pool;
struct r600_resource_global **buffers =
(struct r600_resource_global **)resources;
if (!resources) {
/* XXX: Unset */
return;
}
compute_memory_finalize_pending(pool, ctx_);
for (int i = 0; i < n; i++)
{
assert(resources[i]->target == PIPE_BUFFER);
assert(resources[i]->bind & PIPE_BIND_GLOBAL);
*(handles[i]) = buffers[i]->chunk->start_in_dw * 4;
}
evergreen_set_rat(ctx->cs_shader, 0, pool->bo, 0, pool->size_in_dw * 4);
evergreen_set_vtx_resource(ctx->cs_shader, pool->bo, 1, 0, 1);
}
void evergreen_compute_init_config(struct r600_context *ctx)
{
struct evergreen_compute_resource* res =
get_empty_res(ctx->cs_shader, COMPUTE_RESOURCE_CONFIG, 0);
int num_threads;
int num_stack_entries;
int num_temp_gprs;
enum radeon_family family;
unsigned tmp;
family = ctx->family;
switch (family) {
case CHIP_CEDAR:
default:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
case CHIP_REDWOOD:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
case CHIP_JUNIPER:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 512;
break;
case CHIP_CYPRESS:
case CHIP_HEMLOCK:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 512;
break;
case CHIP_PALM:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
case CHIP_SUMO:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
case CHIP_SUMO2:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 512;
break;
case CHIP_BARTS:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 512;
break;
case CHIP_TURKS:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
case CHIP_CAICOS:
num_temp_gprs = 4;
num_threads = 128;
num_stack_entries = 256;
break;
}
tmp = 0x00000000;
switch (family) {
case CHIP_CEDAR:
case CHIP_PALM:
case CHIP_SUMO:
case CHIP_SUMO2:
case CHIP_CAICOS:
break;
default:
tmp |= S_008C00_VC_ENABLE(1);
break;
}
tmp |= S_008C00_EXPORT_SRC_C(1);
tmp |= S_008C00_CS_PRIO(0);
tmp |= S_008C00_LS_PRIO(0);
tmp |= S_008C00_HS_PRIO(0);
tmp |= S_008C00_PS_PRIO(0);
tmp |= S_008C00_VS_PRIO(0);
tmp |= S_008C00_GS_PRIO(0);
tmp |= S_008C00_ES_PRIO(0);
evergreen_reg_set(res, R_008C00_SQ_CONFIG, tmp);
evergreen_reg_set(res, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0);
evergreen_reg_set(res, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0);
evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
evergreen_reg_set(res, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
/* workaround for hw issues with dyn gpr - must set all limits to 240
* instead of 0, 0x1e == 240/8 */
evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
S_028838_PS_GPRS(0x1e) |
S_028838_VS_GPRS(0x1e) |
S_028838_GS_GPRS(0x1e) |
S_028838_ES_GPRS(0x1e) |
S_028838_HS_GPRS(0x1e) |
S_028838_LS_GPRS(0x1e));
evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF);
evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0);
tmp = S_008C1C_NUM_LS_THREADS(num_threads);
evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp);
evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0);
evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0);
tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries);
evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp);
evergreen_reg_set(res, R_0286CC_SPI_PS_IN_CONTROL_0, S_0286CC_LINEAR_GRADIENT_ENA(1));
evergreen_reg_set(res, R_0286D0_SPI_PS_IN_CONTROL_1, 0);
evergreen_reg_set(res, R_0286E4_SPI_PS_IN_CONTROL_2, 0);
evergreen_reg_set(res, R_0286D8_SPI_INPUT_Z, 0);
evergreen_reg_set(res, R_0286E0_SPI_BARYC_CNTL, 1 << 20);
tmp = S_0286E8_TID_IN_GROUP_ENA | S_0286E8_TGID_ENA | S_0286E8_DISABLE_INDEX_PACK;
evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL, tmp);
tmp = S_028A40_COMPUTE_MODE(1) | S_028A40_PARTIAL_THD_AT_EOI(1);
evergreen_reg_set(res, R_028A40_VGT_GS_MODE, tmp);
evergreen_reg_set(res, R_028B54_VGT_SHADER_STAGES_EN, 2/*CS_ON*/);
evergreen_reg_set(res, R_028800_DB_DEPTH_CONTROL, 0);
evergreen_reg_set(res, R_02880C_DB_SHADER_CONTROL, 0);
evergreen_reg_set(res, R_028000_DB_RENDER_CONTROL, S_028000_COLOR_DISABLE(1));
evergreen_reg_set(res, R_02800C_DB_RENDER_OVERRIDE, 0);
evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL,
S_0286E8_TID_IN_GROUP_ENA
| S_0286E8_TGID_ENA
| S_0286E8_DISABLE_INDEX_PACK)
;
}
void evergreen_init_compute_state_functions(struct r600_context *ctx)
{
ctx->context.create_compute_state = evergreen_create_compute_state;
ctx->context.delete_compute_state = evergreen_delete_compute_state;
ctx->context.bind_compute_state = evergreen_bind_compute_state;
// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
ctx->context.set_compute_resources = evergreen_set_compute_resources;
ctx->context.set_compute_sampler_views = evergreen_set_cs_sampler_view;
ctx->context.bind_compute_sampler_states = evergreen_bind_compute_sampler_states;
ctx->context.set_global_binding = evergreen_set_global_binding;
ctx->context.launch_grid = evergreen_launch_grid;
}
struct pipe_resource *r600_compute_global_buffer_create(
struct pipe_screen *screen,
const struct pipe_resource *templ)
{
assert(templ->target == PIPE_BUFFER);
assert(templ->bind & PIPE_BIND_GLOBAL);
assert(templ->array_size == 1 || templ->array_size == 0);
assert(templ->depth0 == 1 || templ->depth0 == 0);
assert(templ->height0 == 1 || templ->height0 == 0);
struct r600_resource_global* result = (struct r600_resource_global*)
CALLOC(sizeof(struct r600_resource_global), 1);
struct r600_screen* rscreen = (struct r600_screen*)screen;
result->base.b.vtbl = &r600_global_buffer_vtbl;
result->base.b.b.screen = screen;
result->base.b.b = *templ;
pipe_reference_init(&result->base.b.b.reference, 1);
int size_in_dw = (templ->width0+3) / 4;
result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw);
if (result->chunk == NULL)
{
free(result);
return NULL;
}
return &result->base.b.b;
}
void r600_compute_global_buffer_destroy(
struct pipe_screen *screen,
struct pipe_resource *res)
{
assert(res->target == PIPE_BUFFER);
assert(res->bind & PIPE_BIND_GLOBAL);
struct r600_resource_global* buffer = (struct r600_resource_global*)res;
struct r600_screen* rscreen = (struct r600_screen*)screen;
compute_memory_free(rscreen->global_pool, buffer->chunk->id);
buffer->chunk = NULL;
free(res);
}
void* r600_compute_global_transfer_map(
struct pipe_context *ctx_,
struct pipe_transfer* transfer)
{
assert(transfer->resource->target == PIPE_BUFFER);
assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
assert(transfer->box.x >= 0);
assert(transfer->box.y == 0);
assert(transfer->box.z == 0);
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_resource_global* buffer =
(struct r600_resource_global*)transfer->resource;
uint32_t* map;
///TODO: do it better, mapping is not possible if the pool is too big
if (!(map = ctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf,
ctx->cs, transfer->usage))) {
return NULL;
}
COMPUTE_DBG("buffer start: %lli\n", buffer->chunk->start_in_dw);
return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x;
}
void r600_compute_global_transfer_unmap(
struct pipe_context *ctx_,
struct pipe_transfer* transfer)
{
assert(transfer->resource->target == PIPE_BUFFER);
assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_resource_global* buffer =
(struct r600_resource_global*)transfer->resource;
ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf);
}
struct pipe_transfer * r600_compute_global_get_transfer(
struct pipe_context *ctx_,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct compute_memory_pool *pool = ctx->screen->global_pool;
compute_memory_finalize_pending(pool, ctx_);
assert(resource->target == PIPE_BUFFER);
struct r600_context *rctx = (struct r600_context*)ctx_;
struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
transfer->resource = resource;
transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
transfer->stride = 0;
transfer->layer_stride = 0;
transfer->data = NULL;
/* Note strides are zero, this is ok for buffers, but not for
* textures 2d & higher at least.
*/
return transfer;
}
void r600_compute_global_transfer_destroy(
struct pipe_context *ctx_,
struct pipe_transfer *transfer)
{
struct r600_context *rctx = (struct r600_context*)ctx_;
util_slab_free(&rctx->pool_transfers, transfer);
}
void r600_compute_global_transfer_flush_region(
struct pipe_context *ctx_,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
assert(0 && "TODO");
}
void r600_compute_global_transfer_inline_write(
struct pipe_context *pipe,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
unsigned layer_stride)
{
assert(0 && "TODO");
}

View file

@ -0,0 +1,69 @@
/*
* Copyright 2011 Adam Rak <adam.rak@streamnovation.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#ifndef EVERGREEN_COMPUTE_H
#define EVERGREEN_COMPUTE_H
#include "r600.h"
#include "r600_pipe.h"
struct evergreen_compute_resource;
void *evergreen_create_compute_state(struct pipe_context *ctx, const const struct pipe_compute_state *cso);
void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
void evergreen_direct_dispatch( struct pipe_context *context, const uint *block_layout, const uint *grid_layout);
void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input);
void evergreen_compute_init_config(struct r600_context *rctx);
void evergreen_init_compute_state_functions(struct r600_context *rctx);
struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ);
void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res);
void* r600_compute_global_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer);
void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer);
struct pipe_transfer * r600_compute_global_get_transfer(struct pipe_context *, struct pipe_resource *, unsigned level,
unsigned usage, const struct pipe_box *);
void r600_compute_global_transfer_destroy(struct pipe_context *, struct pipe_transfer *);
void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *);
void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level,
unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
static inline void COMPUTE_DBG(const char *fmt, ...)
{
static bool check_debug = false, debug = false;
if (!check_debug) {
debug = debug_get_bool_option("R600_COMPUTE_DEBUG", FALSE);
}
if (debug) {
va_list ap;
va_start(ap, fmt);
_debug_vprintf(fmt, ap);
va_end(ap);
}
}
#endif

View file

@ -0,0 +1,830 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#include <stdlib.h>
#include <stdio.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "util/u_blitter.h"
#include "util/u_double_list.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
#include "util/u_pack_color.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_framebuffer.h"
#include "r600.h"
#include "r600_resource.h"
#include "r600_shader.h"
#include "r600_pipe.h"
#include "r600_formats.h"
#include "evergreend.h"
#include "evergreen_compute_internal.h"
#include "r600_hw_context_priv.h"
int get_compute_resource_num(void)
{
int num = 0;
#define DECL_COMPUTE_RESOURCE(name, n) num += n;
#include "compute_resource.def"
#undef DECL_COMPUTE_RESOURCE
return num;
}
void evergreen_emit_raw_value(
struct evergreen_compute_resource* res,
unsigned value)
{
res->cs[res->cs_end++] = value;
}
void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
{
ctx->cs->buf[ctx->cs->cdw++] = value;
}
void evergreen_mult_reg_set_(
struct evergreen_compute_resource* res,
int index,
u32* array,
int size)
{
int i = 0;
evergreen_emit_raw_reg_set(res, index, size / 4);
for (i = 0; i < size; i+=4) {
res->cs[res->cs_end++] = array[i / 4];
}
}
void evergreen_reg_set(
struct evergreen_compute_resource* res,
unsigned index,
unsigned value)
{
evergreen_emit_raw_reg_set(res, index, 1);
res->cs[res->cs_end++] = value;
}
struct evergreen_compute_resource* get_empty_res(
struct r600_pipe_compute* pipe,
enum evergreen_compute_resources res_code,
int offset_index)
{
int code_index = -1;
int code_size = -1;
{
int i = 0;
#define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n;
#include "compute_resource.def"
#undef DECL_COMPUTE_RESOURCE
}
assert(code_index != -1 && "internal error: resouce index not found");
assert(offset_index < code_size && "internal error: overindexing resource");
int index = code_index + offset_index;
struct evergreen_compute_resource* res = &pipe->resources[index];
res->enabled = true;
res->bo = NULL;
res->cs_end = 0;
bzero(&res->do_reloc, sizeof(res->do_reloc));
return res;
}
void evergreen_emit_raw_reg_set(
struct evergreen_compute_resource* res,
unsigned index,
int num)
{
res->enabled = 1;
int cs_end = res->cs_end;
if (index >= EVERGREEN_CONFIG_REG_OFFSET
&& index < EVERGREEN_CONFIG_REG_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
&& index < EVERGREEN_CONTEXT_REG_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
} else if (index >= EVERGREEN_RESOURCE_OFFSET
&& index < EVERGREEN_RESOURCE_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
} else if (index >= EVERGREEN_SAMPLER_OFFSET
&& index < EVERGREEN_SAMPLER_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
} else if (index >= EVERGREEN_CTL_CONST_OFFSET
&& index < EVERGREEN_CTL_CONST_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
&& index < EVERGREEN_LOOP_CONST_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
&& index < EVERGREEN_BOOL_CONST_END) {
res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
} else {
res->cs[cs_end] = PKT0(index, num-1);
res->cs_end--;
}
res->cs_end += 2;
}
void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
{
res->do_reloc[res->cs_end] += 1;
}
void evergreen_emit_ctx_reg_set(
struct r600_context *ctx,
unsigned index,
int num)
{
if (index >= EVERGREEN_CONFIG_REG_OFFSET
&& index < EVERGREEN_CONFIG_REG_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
} else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
&& index < EVERGREEN_CONTEXT_REG_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
} else if (index >= EVERGREEN_RESOURCE_OFFSET
&& index < EVERGREEN_RESOURCE_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
} else if (index >= EVERGREEN_SAMPLER_OFFSET
&& index < EVERGREEN_SAMPLER_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
} else if (index >= EVERGREEN_CTL_CONST_OFFSET
&& index < EVERGREEN_CTL_CONST_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
} else if (index >= EVERGREEN_LOOP_CONST_OFFSET
&& index < EVERGREEN_LOOP_CONST_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
} else if (index >= EVERGREEN_BOOL_CONST_OFFSET
&& index < EVERGREEN_BOOL_CONST_END) {
ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
} else {
ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
}
}
void evergreen_emit_ctx_reloc(
struct r600_context *ctx,
struct r600_resource *bo,
enum radeon_bo_usage usage)
{
assert(bo);
ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
u32 rr = r600_context_bo_reloc(ctx, bo, usage);
ctx->cs->buf[ctx->cs->cdw++] = rr;
}
void evergreen_set_buffer_sync(
struct r600_context *ctx,
struct r600_resource* bo,
int size,
int flags,
enum radeon_bo_usage usage)
{
assert(bo);
int32_t cp_coher_size = 0;
if (size == 0xffffffff || size == 0) {
cp_coher_size = 0xffffffff;
}
else {
cp_coher_size = ((size + 255) >> 8);
}
uint32_t sync_flags = 0;
if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
sync_flags |= S_0085F0_TC_ACTION_ENA(1);
}
if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
sync_flags |= S_0085F0_VC_ACTION_ENA(1);
}
if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
sync_flags |= S_0085F0_SH_ACTION_ENA(1);
}
if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
sync_flags |= S_0085F0_CB_ACTION_ENA(1);
switch((flags >> 8) & 0xF) {
case 0:
sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
break;
case 1:
sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
break;
case 2:
sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
break;
case 3:
sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
break;
case 4:
sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
break;
case 5:
sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
break;
case 6:
sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
break;
case 7:
sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
break;
case 8:
sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
break;
case 9:
sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
break;
case 10:
sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
break;
case 11:
sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
break;
default:
assert(0);
}
}
int32_t poll_interval = 10;
ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
ctx->cs->buf[ctx->cs->cdw++] = 0;
ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
if (cp_coher_size != 0xffffffff) {
evergreen_emit_ctx_reloc(ctx, bo, usage);
}
}
int evergreen_compute_get_gpu_format(
struct number_type_and_format* fmt,
struct r600_resource *bo)
{
switch (bo->b.b.format)
{
case PIPE_FORMAT_R8_UNORM:
case PIPE_FORMAT_R32_UNORM:
fmt->format = V_028C70_COLOR_32;
fmt->number_type = V_028C70_NUMBER_UNORM;
fmt->num_format_all = 0;
break;
case PIPE_FORMAT_R32_FLOAT:
fmt->format = V_028C70_COLOR_32_FLOAT;
fmt->number_type = V_028C70_NUMBER_FLOAT;
fmt->num_format_all = 0;
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
fmt->number_type = V_028C70_NUMBER_FLOAT;
fmt->num_format_all = 0;
break;
///TODO: other formats...
default:
return 0;
}
return 1;
}
void evergreen_set_rat(
struct r600_pipe_compute *pipe,
int id,
struct r600_resource* bo,
int start,
int size)
{
assert(id < 12);
assert((size & 3) == 0);
assert((start & 0xFF) == 0);
int offset;
COMPUTE_DBG("bind rat: %i \n", id);
if (id < 8) {
offset = id*0x3c;
}
else {
offset = 8*0x3c + (id-8)*0x1c;
}
int linear = 0;
if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
&& bo->b.b.target == PIPE_BUFFER) {
linear = 1;
}
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
evergreen_emit_force_reloc(res);
evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
struct number_type_and_format fmt;
///default config
if (bo->b.b.format == PIPE_FORMAT_NONE) {
fmt.format = V_028C70_COLOR_32;
fmt.number_type = V_028C70_NUMBER_FLOAT;
} else {
evergreen_compute_get_gpu_format(&fmt, bo);
}
if (linear) {
evergreen_reg_set(res,
R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
| S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
| S_028C70_FORMAT(fmt.format)
| S_028C70_NUMBER_TYPE(fmt.number_type)
);
evergreen_emit_force_reloc(res);
} else {
assert(0 && "TODO");
///TODO
// evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
// evergreen_emit_force_reloc(res);
}
evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
evergreen_emit_force_reloc(res);
if (linear) {
/* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
} else {
evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
S_028C78_WIDTH_MAX(bo->b.b.width0)
| S_028C78_HEIGHT_MAX(bo->b.b.height0));
}
if (id < 8) {
evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
evergreen_emit_force_reloc(res);
evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
evergreen_emit_force_reloc(res);
}
evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
res->bo = bo;
res->usage = RADEON_USAGE_READWRITE;
res->coher_bo_size = size;
res->flags = COMPUTE_RES_CB_FLUSH(id);
}
void evergreen_set_lds(
struct r600_pipe_compute *pipe,
int num_lds,
int size,
int num_waves)
{
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
S_008E2C_NUM_LS_LDS(num_lds));
evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
}
void evergreen_set_gds(
struct r600_pipe_compute *pipe,
uint32_t addr,
uint32_t size)
{
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
}
void evergreen_set_export(
struct r600_pipe_compute *pipe,
struct r600_resource* bo,
int offset, int size)
{
#define SX_MEMORY_EXPORT_BASE 0x9010
#define SX_MEMORY_EXPORT_SIZE 0x9014
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
if (size) {
evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
res->bo = bo;
res->usage = RADEON_USAGE_WRITE;
res->coher_bo_size = size;
res->flags = 0;
}
}
void evergreen_set_loop_const(
struct r600_pipe_compute *pipe,
int id, int count, int init, int inc) {
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
assert(id < 32);
assert(count <= 0xFFF);
assert(init <= 0xFF);
assert(inc <= 0xFF);
/* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
* SQ_LOOP_CONST_191 */
evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
count | init << 12 | inc << 24);
}
void evergreen_set_tmp_ring(
struct r600_pipe_compute *pipe,
struct r600_resource* bo,
int offset, int size, int se)
{
#define SQ_LSTMP_RING_BASE 0x00008e10
#define SQ_LSTMP_RING_SIZE 0x00008e14
#define GRBM_GFX_INDEX 0x802C
#define INSTANCE_INDEX(x) ((x) << 0)
#define SE_INDEX(x) ((x) << 16)
#define INSTANCE_BROADCAST_WRITES (1 << 30)
#define SE_BROADCAST_WRITES (1 << 31)
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
evergreen_reg_set(res,
GRBM_GFX_INDEX,INSTANCE_INDEX(0)
| SE_INDEX(se)
| INSTANCE_BROADCAST_WRITES);
evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
if (size) {
assert(bo);
evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
res->bo = bo;
res->usage = RADEON_USAGE_WRITE;
res->coher_bo_size = 0;
res->flags = 0;
}
if (size) {
evergreen_emit_force_reloc(res);
}
evergreen_reg_set(res,
GRBM_GFX_INDEX,INSTANCE_INDEX(0)
| SE_INDEX(0)
| INSTANCE_BROADCAST_WRITES
| SE_BROADCAST_WRITES);
}
static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
{
if (R600_BIG_ENDIAN) {
switch(colorformat) {
case V_028C70_COLOR_4_4:
return ENDIAN_NONE;
/* 8-bit buffers. */
case V_028C70_COLOR_8:
return ENDIAN_NONE;
/* 16-bit buffers. */
case V_028C70_COLOR_5_6_5:
case V_028C70_COLOR_1_5_5_5:
case V_028C70_COLOR_4_4_4_4:
case V_028C70_COLOR_16:
case V_028C70_COLOR_8_8:
return ENDIAN_8IN16;
/* 32-bit buffers. */
case V_028C70_COLOR_8_8_8_8:
case V_028C70_COLOR_2_10_10_10:
case V_028C70_COLOR_8_24:
case V_028C70_COLOR_24_8:
case V_028C70_COLOR_32_FLOAT:
case V_028C70_COLOR_16_16_FLOAT:
case V_028C70_COLOR_16_16:
return ENDIAN_8IN32;
/* 64-bit buffers. */
case V_028C70_COLOR_16_16_16_16:
case V_028C70_COLOR_16_16_16_16_FLOAT:
return ENDIAN_8IN16;
case V_028C70_COLOR_32_32_FLOAT:
case V_028C70_COLOR_32_32:
case V_028C70_COLOR_X24_8_32_FLOAT:
return ENDIAN_8IN32;
/* 96-bit buffers. */
case V_028C70_COLOR_32_32_32_FLOAT:
/* 128-bit buffers. */
case V_028C70_COLOR_32_32_32_32_FLOAT:
case V_028C70_COLOR_32_32_32_32:
return ENDIAN_8IN32;
default:
return ENDIAN_NONE; /* Unsupported. */
}
} else {
return ENDIAN_NONE;
}
}
static unsigned r600_tex_dim(unsigned dim)
{
switch (dim) {
default:
case PIPE_TEXTURE_1D:
return V_030000_SQ_TEX_DIM_1D;
case PIPE_TEXTURE_1D_ARRAY:
return V_030000_SQ_TEX_DIM_1D_ARRAY;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
return V_030000_SQ_TEX_DIM_2D;
case PIPE_TEXTURE_2D_ARRAY:
return V_030000_SQ_TEX_DIM_2D_ARRAY;
case PIPE_TEXTURE_3D:
return V_030000_SQ_TEX_DIM_3D;
case PIPE_TEXTURE_CUBE:
return V_030000_SQ_TEX_DIM_CUBEMAP;
}
}
void evergreen_set_vtx_resource(
struct r600_pipe_compute *pipe,
struct r600_resource* bo,
int id, uint64_t offset, int writable)
{
assert(id < 16);
uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
struct number_type_and_format fmt;
fmt.format = 0;
assert(bo->b.b.height0 <= 1);
assert(bo->b.b.depth0 <= 1);
int e = evergreen_compute_get_gpu_format(&fmt, bo);
assert(e && "unknown format");
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
unsigned size = bo->b.b.width0;
unsigned stride = 1;
// size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
// util_format_get_blocksize(bo->b.b.b.format));
COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n",
id, size, bo->b.b.width0);
sq_vtx_constant_word2 =
S_030008_BASE_ADDRESS_HI(offset >> 32) |
S_030008_STRIDE(stride) |
S_030008_DATA_FORMAT(fmt.format) |
S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
S_030008_ENDIAN_SWAP(0);
COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
stride, fmt.format, fmt.num_format_all);
sq_vtx_constant_word3 =
S_03000C_DST_SEL_X(0) |
S_03000C_DST_SEL_Y(1) |
S_03000C_DST_SEL_Z(2) |
S_03000C_DST_SEL_W(3);
sq_vtx_constant_word4 = 0;
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
evergreen_emit_raw_value(res, (id+816)*32 >> 2);
evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff));
evergreen_emit_raw_value(res, size - 1);
evergreen_emit_raw_value(res, sq_vtx_constant_word2);
evergreen_emit_raw_value(res, sq_vtx_constant_word3);
evergreen_emit_raw_value(res, sq_vtx_constant_word4);
evergreen_emit_raw_value(res, 0);
evergreen_emit_raw_value(res, 0);
evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
res->bo = bo;
if (writable) {
res->usage = RADEON_USAGE_READWRITE;
}
else {
res->usage = RADEON_USAGE_READ;
}
res->coher_bo_size = size;
res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
}
void evergreen_set_tex_resource(
struct r600_pipe_compute *pipe,
struct r600_pipe_sampler_view* view,
int id)
{
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
struct r600_resource_texture *tmp =
(struct r600_resource_texture*)view->base.texture;
unsigned format, endian;
uint32_t word4 = 0, yuv_format = 0, pitch = 0;
unsigned char swizzle[4], array_mode = 0, tile_type = 0;
unsigned height, depth;
swizzle[0] = 0;
swizzle[1] = 1;
swizzle[2] = 2;
swizzle[3] = 3;
format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
view->base.format, swizzle, &word4, &yuv_format);
if (format == ~0) {
format = 0;
}
endian = r600_colorformat_endian_swap(format);
height = view->base.texture->height0;
depth = view->base.texture->depth0;
pitch = align(tmp->pitch_in_blocks[0] *
util_format_get_blockwidth(tmp->real_format), 8);
array_mode = tmp->array_mode[0];
tile_type = tmp->tile_type;
assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
evergreen_emit_raw_value(res,
(S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
S_030000_PITCH((pitch / 8) - 1) |
S_030000_NON_DISP_TILING_ORDER(tile_type) |
S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode)));
evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
evergreen_emit_raw_value(res, (word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
S_030010_ENDIAN_SWAP(endian) |
S_030010_BASE_LEVEL(0)));
evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
S_030014_BASE_ARRAY(0) |
S_030014_LAST_ARRAY(0)));
evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
evergreen_emit_raw_value(res,
S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
| S_03001C_DATA_FORMAT(format));
res->bo = (struct r600_resource*)view->base.texture;
res->usage = RADEON_USAGE_READ;
res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
res->flags = COMPUTE_RES_TC_FLUSH;
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
}
void evergreen_set_sampler_resource(
struct r600_pipe_compute *pipe,
struct compute_sampler_state *sampler,
int id)
{
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
evergreen_emit_raw_value(res, (id + 90)*3);
evergreen_emit_raw_value(res,
S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
);
evergreen_emit_raw_value(res,
S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
);
evergreen_emit_raw_value(res,
S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
(sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
S_03C008_TYPE(1)
);
}
void evergreen_set_const_cache(
struct r600_pipe_compute *pipe,
int cache_id,
struct r600_resource* cbo,
int size, int offset)
{
#define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
#define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
struct evergreen_compute_resource* res =
get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
assert(size < 0x200);
assert((offset & 0xFF) == 0);
assert(cache_id < 16);
evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
res->bo = cbo;
res->usage = RADEON_USAGE_READ;
res->coher_bo_size = size;
res->flags = COMPUTE_RES_SH_FLUSH;
}
struct r600_resource* r600_compute_buffer_alloc_vram(
struct r600_screen *screen,
unsigned size)
{
assert(size);
struct pipe_resource * buffer = pipe_buffer_create(
(struct pipe_screen*) screen,
PIPE_BIND_CUSTOM,
PIPE_USAGE_IMMUTABLE,
size);
return (struct r600_resource *)buffer;
}

View file

@ -0,0 +1,119 @@
/*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Adam Rak <adam.rak@streamnovation.com>
*/
#ifndef EVERGREEN_COMPUTE_INTERNAL_H
#define EVERGREEN_COMPUTE_INTERNAL_H
#include "compute_memory_pool.h"
enum evergreen_compute_resources
{
#define DECL_COMPUTE_RESOURCE(name, n) COMPUTE_RESOURCE_ ## name ,
#include "compute_resource.def"
#undef DECL_COMPUTE_RESOURCE
__COMPUTE_RESOURCE_END__
};
typedef unsigned u32;
#define COMPUTE_RES_TC_FLUSH 0xF0001
#define COMPUTE_RES_VC_FLUSH 0xF0002
#define COMPUTE_RES_SH_FLUSH 0xF0004
#define COMPUTE_RES_CB_FLUSH(x) (0xF0008 | x << 8)
#define COMPUTE_RES_FULL_FLUSH 0xF0010
struct evergreen_compute_resource {
int enabled;
int do_reloc[256];
u32 cs[256];
int cs_end;
struct r600_resource *bo;
int coher_bo_size;
enum radeon_bo_usage usage;
int flags; ///flags for COMPUTE_RES_*_FLUSH
};
struct compute_sampler_state {
struct r600_pipe_state base;
struct pipe_sampler_state state;
};
struct number_type_and_format {
unsigned format;
unsigned number_type;
unsigned num_format_all;
};
struct r600_pipe_compute {
struct r600_context *ctx;
struct r600_bytecode bc;
struct tgsi_token *tokens;
struct evergreen_compute_resource *resources;
unsigned local_size;
unsigned private_size;
unsigned input_size;
#ifdef HAVE_OPENCL
LLVMModuleRef mod;
#endif
struct r600_resource *kernel_param;
struct r600_resource *shader_code_bo;
};
int evergreen_compute_get_gpu_format(struct number_type_and_format* fmt, struct r600_resource *bo); ///get hw format from resource, return 0 on faliure, nonzero on success
void evergreen_emit_raw_reg_set(struct evergreen_compute_resource* res, unsigned index, int num);
void evergreen_emit_ctx_reg_set(struct r600_context *ctx, unsigned index, int num);
void evergreen_emit_raw_value(struct evergreen_compute_resource* res, unsigned value);
void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value);
void evergreen_mult_reg_set_(struct evergreen_compute_resource* res, int index, u32* array, int size);
void evergreen_emit_ctx_reloc(struct r600_context *ctx, struct r600_resource *bo, enum radeon_bo_usage usage);
void evergreen_reg_set(struct evergreen_compute_resource* res, unsigned index, unsigned value);
void evergreen_emit_force_reloc(struct evergreen_compute_resource* res);
void evergreen_set_buffer_sync(struct r600_context *ctx, struct r600_resource* bo, int size, int flags, enum radeon_bo_usage usage);
struct evergreen_compute_resource* get_empty_res(struct r600_pipe_compute*, enum evergreen_compute_resources res_code, int index);
int get_compute_resource_num(void);
#define evergreen_mult_reg_set(res, index, array) evergreen_mult_reg_set_(res, index, array, sizeof(array))
void evergreen_set_rat(struct r600_pipe_compute *pipe, int id, struct r600_resource* bo, int start, int size);
void evergreen_set_lds(struct r600_pipe_compute *pipe, int num_lds, int size, int num_waves);
void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t size);
void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);
void evergreen_set_tmp_ring(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size, int se);
void evergreen_set_vtx_resource(struct r600_pipe_compute *pipe, struct r600_resource* bo, int id, uint64_t offset, int writable);
void evergreen_set_tex_resource(struct r600_pipe_compute *pipe, struct r600_pipe_sampler_view* view, int id);
void evergreen_set_sampler_resource(struct r600_pipe_compute *pipe, struct compute_sampler_state *sampler, int id);
void evergreen_set_const_cache(struct r600_pipe_compute *pipe, int cache_id, struct r600_resource* cbo, int size, int offset);
struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size);
#endif

View file

@ -28,6 +28,7 @@
#include "util/u_memory.h"
#include "util/u_framebuffer.h"
#include "util/u_dual_blend.h"
#include "evergreen_compute.h"
static uint32_t eg_num_banks(uint32_t nbanks)
{
@ -1881,6 +1882,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
rctx->context.create_stream_output_target = r600_create_so_target;
rctx->context.stream_output_target_destroy = r600_so_target_destroy;
rctx->context.set_stream_output_targets = r600_set_so_targets;
evergreen_init_compute_state_functions(rctx);
}
static void cayman_init_atom_start_cs(struct r600_context *rctx)

View file

@ -61,6 +61,8 @@
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
#define PKT3_NOP 0x10
#define PKT3_DISPATCH_DIRECT 0x15
#define PKT3_DISPATCH_INDIRECT 0x16
#define PKT3_INDIRECT_BUFFER_END 0x17
#define PKT3_SET_PREDICATION 0x20
#define PKT3_REG_RMW 0x21
@ -114,6 +116,11 @@
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
/*Evergreen Compute packet3*/
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
/* Registers */
#define R_0084FC_CP_STRMOUT_CNTL 0x000084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
@ -241,6 +248,15 @@
#define G_008CF0_ALU_UPDATE_FIFO_HIWATER(x) (((x) >> 24) & 0x1F)
#define C_008CF0_ALU_UPDATE_FIFO_HIWATER(x) 0xE0FFFFFF
#define R_008E20_SQ_STATIC_THREAD_MGMT1 0x8E20
#define R_008E24_SQ_STATIC_THREAD_MGMT2 0x8E24
#define R_008E28_SQ_STATIC_THREAD_MGMT3 0x8E28
#define R_00899C_VGT_COMPUTE_START_X 0x0000899C
#define R_0089A0_VGT_COMPUTE_START_Y 0x000089A0
#define R_0089A4_VGT_COMPUTE_START_Z 0x000089A4
#define R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE 0x000089AC
#define R_009100_SPI_CONFIG_CNTL 0x00009100
#define R_00913C_SPI_CONFIG_CNTL_1 0x0000913C
#define S_00913C_VTX_DONE_DELAY(x) (((x) & 0xF) << 0)
@ -397,6 +413,11 @@
#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1)
#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF
#define R_0286EC_SPI_COMPUTE_NUM_THREAD_X 0x0286EC
#define R_0286F0_SPI_COMPUTE_NUM_THREAD_Y 0x0286F0
#define R_0286F4_SPI_COMPUTE_NUM_THREAD_Z 0x0286F4
#define R_028B74_VGT_DISPATCH_INITIATOR 0x028B74
#define R_028800_DB_DEPTH_CONTROL 0x028800
#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0)
#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1)
@ -747,6 +768,8 @@
#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3)
#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3)
#define C_028A40_CUT_MODE 0xFFFFFFE7
#define S_028A40_COMPUTE_MODE(x) (x << 14)
#define S_028A40_PARTIAL_THD_AT_EOI(x) (x << 17)
#define R_028A6C_VGT_GS_OUT_PRIM_TYPE 0x028A6C
#define S_028A6C_OUTPRIM_TYPE(x) (((x) & 0x3F) << 0)
#define V_028A6C_OUTPRIM_TYPE_POINTLIST 0
@ -1434,6 +1457,50 @@
#define G_028848_ALLOW_DOUBLE_DENORM_OUT(x) (((x) >> 7) & 0x1)
#define C_028848_ALLOW_DOUBLE_DENORM_OUT 0xFFFFFF7F
#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
#define C_0288D4_NUM_GPRS 0xFFFFFF00
#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
#define C_0288D4_STACK_SIZE 0xFFFF00FF
#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
#define C_0288D4_NUM_GPRS 0xFFFFFF00
#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
#define C_0288D4_STACK_SIZE 0xFFFF00FF
#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644
#define S_028644_SEMANTIC(x) (((x) & 0xFF) << 0)
#define G_028644_SEMANTIC(x) (((x) >> 0) & 0xFF)
@ -1710,6 +1777,12 @@
#define R_0286DC_SPI_FOG_CNTL 0x000286DC
#define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4
#define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8
#define S_0286E8_TID_IN_GROUP_ENA 1
#define S_0286E8_TGID_ENA 2
#define S_0286E8_DISABLE_INDEX_PACK 4
#define R_028720_GDS_ADDR_BASE 0x00028720
#define R_028724_GDS_ADDR_SIZE 0x00028724
#define R_028728_GDS_ORDERED_WAVE_PER_SE 0x00028728
#define R_028784_CB_BLEND1_CONTROL 0x00028784
#define R_028788_CB_BLEND2_CONTROL 0x00028788
#define R_02878C_CB_BLEND3_CONTROL 0x0002878C
@ -1736,6 +1809,7 @@
#define C_02884C_EXPORT_Z 0xFFFFFFFE
#define R_02885C_SQ_PGM_START_VS 0x0002885C
#define R_0288A4_SQ_PGM_START_FS 0x000288A4
#define R_0288D0_SQ_PGM_START_LS 0x000288d0
#define R_0288A8_SQ_PGM_RESOURCES_FS 0x000288A8
#define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC
#define R_028900_SQ_ESGS_RING_ITEMSIZE 0x00028900

View file

@ -0,0 +1,19 @@
#include <llvm/ADT/OwningPtr.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/LLVMContext.h>
#include <llvm/Support/IRReader.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include "llvm_wrapper.h"
extern "C" LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len)
{
llvm::OwningPtr<llvm::Module> M;
llvm::StringRef str((const char*)bitcode, bitcode_len);
llvm::MemoryBuffer* buffer = llvm::MemoryBuffer::getMemBufferCopy(str);
llvm::SMDiagnostic Err;
M.reset(llvm::ParseIR(buffer, Err, llvm::getGlobalContext()));
return wrap(M.take());
}

View file

@ -0,0 +1,16 @@
#ifndef LLVM_WRAPPER_H
#define LLVM_WRAPPER_H
#include <llvm-c/Core.h>
#ifdef __cplusplus
extern "C" {
#endif
LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -2,7 +2,7 @@
#ifndef R600_LLVM_H
#define R600_LLVM_H
#ifdef R600_USE_LLVM
#if defined R600_USE_LLVM || defined HAVE_OPENCL
#include "radeon_llvm.h"
#include <llvm-c/Core.h>
@ -24,6 +24,6 @@ unsigned r600_llvm_compile(
enum radeon_family family,
unsigned dump);
#endif /* R600_USE_LLVM */
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */

View file

@ -382,6 +382,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@ -409,7 +410,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_COMPUTE:
return 0;
/* Stream output. */
@ -491,6 +491,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_COMPUTE:
break;
case PIPE_SHADER_GEOMETRY:
/* XXX: support and enable geometry programs */
@ -538,8 +539,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return rscreen->glsl_feature_level >= 130;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_PREFERRED_IR:
if (shader == PIPE_SHADER_COMPUTE) {
return PIPE_SHADER_IR_LLVM;
} else {
return PIPE_SHADER_IR_TGSI;
}
}
return 0;
}
@ -569,6 +574,81 @@ static int r600_get_video_param(struct pipe_screen *screen,
}
}
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_compute_cap param,
void *ret)
{
//TODO: select these params by asic
switch (param) {
case PIPE_COMPUTE_CAP_IR_TARGET:
if (ret) {
strcpy(ret, "r600--");
}
return 7 * sizeof(char);
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
if (ret) {
uint64_t * grid_dimension = ret;
grid_dimension[0] = 3;
}
return 1 * sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
if (ret) {
uint64_t * grid_size = ret;
grid_size[0] = 65535;
grid_size[1] = 65535;
grid_size[2] = 1;
}
return 3 * sizeof(uint64_t) ;
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
if (ret) {
uint64_t * block_size = ret;
block_size[0] = 256;
block_size[1] = 256;
block_size[2] = 256;
}
return 3 * sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
if (ret) {
uint64_t * max_threads_per_block = ret;
*max_threads_per_block = 256;
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
if (ret) {
uint64_t * max_global_size = ret;
/* XXX: This is what the proprietary driver reports, we
* may want to use a different value. */
*max_global_size = 201326592;
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
if (ret) {
uint64_t * max_input_size = ret;
*max_input_size = 1024;
}
return sizeof(uint64_t);
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
if (ret) {
uint64_t * max_local_size = ret;
/* XXX: This is what the proprietary driver reports, we
* may want to use a different value. */
*max_local_size = 32768;
}
return sizeof(uint64_t);
default:
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
return 0;
}
}
static void r600_destroy_screen(struct pipe_screen* pscreen)
{
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
@ -576,6 +656,10 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
if (rscreen == NULL)
return;
if (rscreen->global_pool) {
compute_memory_pool_delete(rscreen->global_pool);
}
if (rscreen->fences.bo) {
struct r600_fence_block *entry, *tmp;
@ -833,6 +917,8 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->screen.get_shader_param = r600_get_shader_param;
rscreen->screen.get_paramf = r600_get_paramf;
rscreen->screen.get_video_param = r600_get_video_param;
rscreen->screen.get_compute_param = r600_get_compute_param;
if (rscreen->chip_class >= EVERGREEN) {
rscreen->screen.is_format_supported = evergreen_is_format_supported;
} else {
@ -857,5 +943,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE);
rscreen->glsl_feature_level = debug_get_bool_option("R600_GLSL130", TRUE) ? 130 : 120;
rscreen->global_pool = compute_memory_pool_new(1024*16, rscreen);
return &rscreen->screen;
}

View file

@ -28,8 +28,11 @@
#include "util/u_slab.h"
#include "r600.h"
#include "r600_llvm.h"
#include "r600_public.h"
#include "r600_shader.h"
#include "r600_resource.h"
#include "evergreen_compute.h"
#define R600_MAX_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFER_SIZE 4096
@ -98,9 +101,16 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_RESOURCE,
R600_PIPE_STATE_POLYGON_OFFSET,
R600_PIPE_STATE_FETCH_SHADER,
R600_PIPE_STATE_SPI,
R600_PIPE_NSTATES
};
struct compute_memory_pool;
void compute_memory_pool_delete(struct compute_memory_pool* pool);
struct compute_memory_pool* compute_memory_pool_new(
int64_t initial_size_in_dw,
struct r600_screen *rscreen);
struct r600_pipe_fences {
struct r600_resource *bo;
unsigned *data;
@ -123,6 +133,12 @@ struct r600_screen {
bool use_surface_alloc;
int glsl_feature_level;
/*for compute global memory binding, we allocate stuff here, instead of
* buffers.
* XXX: Not sure if this is the best place for global_pool. Also,
* it's not thread safe, so it won't work with multiple contexts. */
struct compute_memory_pool *global_pool;
};
struct r600_pipe_sampler_view {
@ -257,6 +273,7 @@ struct r600_context {
struct pipe_clip_state clip;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
struct r600_pipe_compute *cs_shader;
struct r600_pipe_rasterizer *rasterizer;
struct r600_pipe_state vgt;
struct r600_pipe_state spi;
@ -266,7 +283,9 @@ struct r600_context {
unsigned saved_render_cond_mode;
/* shader information */
boolean two_side;
boolean spi_dirty;
unsigned sprite_coord_enable;
boolean flatshade;
boolean export_16bpc;
unsigned alpha_ref;
boolean alpha_ref_dirty;
@ -412,6 +431,10 @@ void r600_init_context_resource_functions(struct r600_context *r600);
/* r600_shader.c */
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader);
#ifdef HAVE_OPENCL
int r600_compute_shader_create(struct pipe_context * ctx,
LLVMModuleRef mod, struct r600_bytecode * bytecode);
#endif
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id);

View file

@ -27,7 +27,12 @@ static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER) {
return r600_buffer_create(screen, templ);
if (templ->bind & PIPE_BIND_GLOBAL) {
return r600_compute_global_buffer_create(screen, templ);
}
else {
return r600_buffer_create(screen, templ);
}
} else {
return r600_texture_create(screen, templ);
}
@ -44,12 +49,21 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre
}
}
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res)
{
if (res->target == PIPE_BUFFER && (res->bind & PIPE_BIND_GLOBAL)) {
r600_compute_global_buffer_destroy(screen, res);
} else {
u_resource_destroy_vtbl(screen, res);
}
}
void r600_init_screen_resource_functions(struct pipe_screen *screen)
{
screen->resource_create = r600_resource_create;
screen->resource_from_handle = r600_resource_from_handle;
screen->resource_get_handle = u_resource_get_handle_vtbl;
screen->resource_destroy = u_resource_destroy_vtbl;
screen->resource_destroy = r600_resource_destroy;
}
void r600_init_context_resource_functions(struct r600_context *r600)

View file

@ -34,6 +34,13 @@ struct r600_transfer {
unsigned offset;
};
struct compute_memory_item;
struct r600_resource_global {
struct r600_resource base;
struct compute_memory_item *chunk;
};
struct r600_resource_texture {
struct r600_resource resource;
@ -65,6 +72,7 @@ struct r600_surface {
unsigned aligned_height;
};
void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
void r600_init_screen_resource_functions(struct pipe_screen *screen);
/* r600_texture */

View file

@ -225,6 +225,37 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
* struct r600_bytecode.
*/
static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
unsigned char * bytes, unsigned num_bytes);
#ifdef HAVE_OPENCL
int r600_compute_shader_create(struct pipe_context * ctx,
LLVMModuleRef mod, struct r600_bytecode * bytecode)
{
struct r600_context *r600_ctx = (struct r600_context *)ctx;
unsigned char * bytes;
unsigned byte_count;
struct r600_shader_ctx shader_ctx;
unsigned dump = 0;
if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
dump = 1;
}
r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
shader_ctx.bc = bytecode;
r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
r600_bytecode_build(shader_ctx.bc);
if (dump) {
r600_bytecode_dump(shader_ctx.bc);
}
return 1;
}
#endif /* HAVE_OPENCL */
static unsigned r600_src_from_byte_stream(unsigned char * bytes,
unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
{

View file

@ -916,6 +916,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
unsigned offset = 0;
char *map;
if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
return r600_compute_global_transfer_map(ctx, transfer);
}
if (rtransfer->staging) {
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
} else {
@ -945,6 +949,10 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context*)ctx;
struct radeon_winsys_cs_handle *buf;
if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
return r600_compute_global_transfer_unmap(ctx, transfer);
}
if (rtransfer->staging) {
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
} else {