mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 06:50:11 +01:00
r600g: add async for staging buffer upload v2
v2: Add virtual address to dma src/dst offset for cayman Signed-off-by: Jerome Glisse <jglisse@redhat.com>
This commit is contained in:
parent
bff07638a8
commit
325422c494
12 changed files with 595 additions and 17 deletions
|
|
@ -26,6 +26,7 @@
|
|||
#include "r600_hw_context_priv.h"
|
||||
#include "evergreend.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static const struct r600_reg cayman_config_reg_list[] = {
|
||||
{R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0},
|
||||
|
|
@ -238,3 +239,48 @@ void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_en
|
|||
r600_write_context_reg(cs, R_028B94_VGT_STRMOUT_CONFIG, S_028B94_STREAMOUT_0_EN(0));
|
||||
}
|
||||
}
|
||||
|
||||
void evergreen_dma_copy(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
struct pipe_resource *src,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset,
|
||||
unsigned long size)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
|
||||
unsigned i, ncopy, csize, sub_cmd, shift;
|
||||
struct r600_resource *rdst = (struct r600_resource*)dst;
|
||||
struct r600_resource *rsrc = (struct r600_resource*)src;
|
||||
|
||||
/* make sure that the dma ring is only one active */
|
||||
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
dst_offset += r600_resource_va(&rctx->screen->screen, dst);
|
||||
src_offset += r600_resource_va(&rctx->screen->screen, src);
|
||||
|
||||
/* see if we use dword or byte copy */
|
||||
if (!(dst_offset & 0x3) && !(src_offset & 0x3) && !(size & 0x3)) {
|
||||
size >>= 2;
|
||||
sub_cmd = 0x00;
|
||||
shift = 2;
|
||||
} else {
|
||||
sub_cmd = 0x40;
|
||||
shift = 0;
|
||||
}
|
||||
ncopy = (size / 0x000fffff) + !!(size % 0x000fffff);
|
||||
|
||||
r600_need_dma_space(rctx, ncopy * 5);
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = size < 0x000fffff ? size : 0x000fffff;
|
||||
/* emit reloc before writting cs so that cs is always in consistent state */
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, rsrc, RADEON_USAGE_READ);
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, rdst, RADEON_USAGE_WRITE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
|
||||
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
|
||||
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
|
||||
cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
|
||||
cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
|
||||
dst_offset += csize << shift;
|
||||
src_offset += csize << shift;
|
||||
size -= csize;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,20 @@
|
|||
#include "util/u_framebuffer.h"
|
||||
#include "util/u_dual_blend.h"
|
||||
#include "evergreen_compute.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static INLINE unsigned evergreen_array_mode(unsigned mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED;
|
||||
break;
|
||||
case RADEON_SURF_MODE_1D: return V_028C70_ARRAY_1D_TILED_THIN1;
|
||||
break;
|
||||
case RADEON_SURF_MODE_2D: return V_028C70_ARRAY_2D_TILED_THIN1;
|
||||
default:
|
||||
case RADEON_SURF_MODE_LINEAR: return V_028C70_ARRAY_LINEAR_GENERAL;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t eg_num_banks(uint32_t nbanks)
|
||||
{
|
||||
|
|
@ -3445,3 +3459,190 @@ void evergreen_update_db_shader_control(struct r600_context * rctx)
|
|||
rctx->db_misc_state.atom.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void evergreen_dma_copy_tile(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x,
|
||||
unsigned dst_y,
|
||||
unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
unsigned src_x,
|
||||
unsigned src_y,
|
||||
unsigned src_z,
|
||||
unsigned copy_height,
|
||||
unsigned pitch,
|
||||
unsigned bpp)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
|
||||
struct r600_texture *rsrc = (struct r600_texture*)src;
|
||||
struct r600_texture *rdst = (struct r600_texture*)dst;
|
||||
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
|
||||
unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
|
||||
unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split;
|
||||
unsigned long base, addr;
|
||||
|
||||
/* make sure that the dma ring is only one active */
|
||||
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
|
||||
dst_mode = rdst->surface.level[dst_level].mode;
|
||||
src_mode = rsrc->surface.level[src_level].mode;
|
||||
/* downcast linear aligned to linear to simplify test */
|
||||
src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
|
||||
dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
|
||||
assert(dst_mode != src_mode);
|
||||
|
||||
y = 0;
|
||||
sub_cmd = 0x8;
|
||||
lbpp = util_logbase2(bpp);
|
||||
pitch_tile_max = ((pitch / bpp) >> 3) - 1;
|
||||
nbanks = eg_num_banks(rctx->screen->tiling_info.num_banks);
|
||||
|
||||
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
|
||||
/* T2L */
|
||||
array_mode = evergreen_array_mode(src_mode);
|
||||
slice_tile_max = (((pitch * rsrc->surface.level[src_level].npix_y) >> 6) / bpp) - 1;
|
||||
/* linear height must be the same as the slice tile max height, it's ok even
|
||||
* if the linear destination/source have smaller heigh as the size of the
|
||||
* dma packet will be using the copy_height which is always smaller or equal
|
||||
* to the linear height
|
||||
*/
|
||||
height = rsrc->surface.level[src_level].npix_y;
|
||||
detile = 1;
|
||||
x = src_x;
|
||||
y = src_y;
|
||||
z = src_z;
|
||||
base = rsrc->surface.level[src_level].offset;
|
||||
addr = rdst->surface.level[dst_level].offset;
|
||||
addr += rdst->surface.level[dst_level].slice_size * dst_z;
|
||||
addr += dst_y * pitch + dst_x * bpp;
|
||||
bank_h = eg_bank_wh(rsrc->surface.bankh);
|
||||
bank_w = eg_bank_wh(rsrc->surface.bankw);
|
||||
mt_aspect = eg_macro_tile_aspect(rsrc->surface.mtilea);
|
||||
tile_split = eg_tile_split(rsrc->surface.tile_split);
|
||||
base += r600_resource_va(&rctx->screen->screen, src);
|
||||
addr += r600_resource_va(&rctx->screen->screen, dst);
|
||||
} else {
|
||||
/* L2T */
|
||||
array_mode = evergreen_array_mode(dst_mode);
|
||||
slice_tile_max = (((pitch * rdst->surface.level[dst_level].npix_y) >> 6) / bpp) - 1;
|
||||
/* linear height must be the same as the slice tile max height, it's ok even
|
||||
* if the linear destination/source have smaller heigh as the size of the
|
||||
* dma packet will be using the copy_height which is always smaller or equal
|
||||
* to the linear height
|
||||
*/
|
||||
height = rdst->surface.level[dst_level].npix_y;
|
||||
detile = 0;
|
||||
x = dst_x;
|
||||
y = dst_y;
|
||||
z = dst_z;
|
||||
base = rdst->surface.level[dst_level].offset;
|
||||
addr = rsrc->surface.level[src_level].offset;
|
||||
addr += rsrc->surface.level[src_level].slice_size * src_z;
|
||||
addr += src_y * pitch + src_x * bpp;
|
||||
bank_h = eg_bank_wh(rdst->surface.bankh);
|
||||
bank_w = eg_bank_wh(rdst->surface.bankw);
|
||||
mt_aspect = eg_macro_tile_aspect(rdst->surface.mtilea);
|
||||
tile_split = eg_tile_split(rdst->surface.tile_split);
|
||||
base += r600_resource_va(&rctx->screen->screen, dst);
|
||||
addr += r600_resource_va(&rctx->screen->screen, src);
|
||||
}
|
||||
|
||||
size = (copy_height * pitch) >> 2;
|
||||
ncopy = (size / 0x000fffff) + !!(size % 0x000fffff);
|
||||
r600_need_dma_space(rctx, ncopy * 9);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
cheight = copy_height;
|
||||
if (((cheight * pitch) >> 2) > 0x000fffff) {
|
||||
cheight = (0x000fffff << 2) / pitch;
|
||||
}
|
||||
size = (cheight * pitch) >> 2;
|
||||
/* emit reloc before writting cs so that cs is always in consistent state */
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, &rsrc->resource, RADEON_USAGE_READ);
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, &rdst->resource, RADEON_USAGE_WRITE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
|
||||
cs->buf[cs->cdw++] = base >> 8;
|
||||
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
|
||||
(lbpp << 24) | (bank_h << 21) |
|
||||
(bank_w << 18) | (mt_aspect << 16);
|
||||
cs->buf[cs->cdw++] = (pitch_tile_max << 0) | ((height - 1) << 16);
|
||||
cs->buf[cs->cdw++] = (slice_tile_max << 0);
|
||||
cs->buf[cs->cdw++] = (x << 0) | (z << 18);
|
||||
cs->buf[cs->cdw++] = (y << 0) | (tile_split << 21) | (nbanks << 25);
|
||||
cs->buf[cs->cdw++] = addr & 0xfffffffc;
|
||||
cs->buf[cs->cdw++] = (addr >> 32UL) & 0xff;
|
||||
copy_height -= cheight;
|
||||
addr += cheight * pitch;
|
||||
y += cheight;
|
||||
}
|
||||
}
|
||||
|
||||
boolean evergreen_dma_blit(struct pipe_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y, unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_texture *rsrc = (struct r600_texture*)src;
|
||||
struct r600_texture *rdst = (struct r600_texture*)dst;
|
||||
unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
|
||||
unsigned src_w, dst_w;
|
||||
|
||||
if (rctx->rings.dma.cs == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if (src->format != dst->format) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
bpp = rdst->surface.bpe;
|
||||
dst_pitch = rdst->surface.level[dst_level].pitch_bytes;
|
||||
src_pitch = rsrc->surface.level[src_level].pitch_bytes;
|
||||
src_w = rsrc->surface.level[src_level].npix_x;
|
||||
dst_w = rdst->surface.level[dst_level].npix_x;
|
||||
copy_height = src_box->height / rsrc->surface.blk_h;
|
||||
|
||||
dst_mode = rdst->surface.level[dst_level].mode;
|
||||
src_mode = rsrc->surface.level[src_level].mode;
|
||||
/* downcast linear aligned to linear to simplify test */
|
||||
src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
|
||||
dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
|
||||
|
||||
if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
|
||||
/* FIXME evergreen can do partial blit */
|
||||
return FALSE;
|
||||
}
|
||||
/* the x test here are currently useless (because we don't support partial blit)
|
||||
* but keep them around so we don't forget about those
|
||||
*/
|
||||
if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (src_mode == dst_mode) {
|
||||
unsigned long dst_offset, src_offset;
|
||||
/* simple dma blit would do NOTE code here assume :
|
||||
* src_box.x/y == 0
|
||||
* dst_x/y == 0
|
||||
* dst_pitch == src_pitch
|
||||
*/
|
||||
src_offset= rsrc->surface.level[src_level].offset;
|
||||
src_offset += rsrc->surface.level[src_level].slice_size * src_box->z;
|
||||
src_offset += src_box->y * src_pitch + src_box->x * bpp;
|
||||
dst_offset = rdst->surface.level[dst_level].offset;
|
||||
dst_offset += rdst->surface.level[dst_level].slice_size * dst_z;
|
||||
dst_offset += dst_y * dst_pitch + dst_x * bpp;
|
||||
evergreen_dma_copy(rctx, dst, src, dst_offset, src_offset,
|
||||
src_box->height * src_pitch);
|
||||
} else {
|
||||
evergreen_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
|
||||
src, src_level, src_box->x, src_box->y, src_box->z,
|
||||
copy_height, dst_pitch, bpp);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2317,4 +2317,19 @@
|
|||
#define G_028AA8_SWITCH_ON_EOP(x) (((x) >> 17) & 0x1)
|
||||
#define C_028AA8_SWITCH_ON_EOP 0xFFFDFFFF
|
||||
|
||||
/* async DMA packets */
|
||||
#define DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) | \
|
||||
(((sub_cmd) & 0xFF) << 20) |\
|
||||
(((n) & 0xFFFFF) << 0))
|
||||
/* async DMA Packet types */
|
||||
#define DMA_PACKET_WRITE 0x2
|
||||
#define DMA_PACKET_COPY 0x3
|
||||
#define DMA_PACKET_INDIRECT_BUFFER 0x4
|
||||
#define DMA_PACKET_SEMAPHORE 0x5
|
||||
#define DMA_PACKET_FENCE 0x6
|
||||
#define DMA_PACKET_TRAP 0x7
|
||||
#define DMA_PACKET_SRBM_WRITE 0x9
|
||||
#define DMA_PACKET_CONSTANT_FILL 0xd
|
||||
#define DMA_PACKET_NOP 0xf
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -170,6 +170,33 @@ void r600_flush_emit(struct r600_context *ctx);
|
|||
void r600_context_streamout_begin(struct r600_context *ctx);
|
||||
void r600_context_streamout_end(struct r600_context *ctx);
|
||||
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
|
||||
void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
|
||||
void r600_dma_copy(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
struct pipe_resource *src,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset,
|
||||
unsigned long size);
|
||||
boolean r600_dma_blit(struct pipe_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y, unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
void evergreen_dma_copy(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
struct pipe_resource *src,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset,
|
||||
unsigned long size);
|
||||
boolean evergreen_dma_blit(struct pipe_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y, unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block, unsigned pkt_flags);
|
||||
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
||||
struct pipe_resource *dst, uint64_t dst_offset,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include "r600_pipe.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_surface.h"
|
||||
|
||||
static void r600_buffer_destroy(struct pipe_screen *screen,
|
||||
struct pipe_resource *buf)
|
||||
|
|
@ -179,13 +180,27 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
|
|||
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
|
||||
|
||||
if (rtransfer->staging) {
|
||||
struct pipe_box box;
|
||||
u_box_1d(rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT,
|
||||
transfer->box.width, &box);
|
||||
struct pipe_resource *dst, *src;
|
||||
unsigned soffset, doffset, size;
|
||||
|
||||
dst = transfer->resource;
|
||||
src = &rtransfer->staging->b.b;
|
||||
size = transfer->box.width;
|
||||
doffset = transfer->box.x;
|
||||
soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
|
||||
/* Copy the staging buffer into the original one. */
|
||||
r600_copy_buffer(pipe, transfer->resource, transfer->box.x,
|
||||
&rtransfer->staging->b.b, &box);
|
||||
if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset)) {
|
||||
if (rctx->screen->chip_class >= EVERGREEN) {
|
||||
evergreen_dma_copy(rctx, dst, src, doffset, soffset, size);
|
||||
} else {
|
||||
r600_dma_copy(rctx, dst, src, doffset, soffset, size);
|
||||
}
|
||||
} else {
|
||||
struct pipe_box box;
|
||||
|
||||
u_box_1d(soffset, size, &box);
|
||||
r600_copy_buffer(pipe, dst, doffset, src, &box);
|
||||
}
|
||||
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
|
||||
}
|
||||
util_slab_free(&rctx->pool_transfers, transfer);
|
||||
|
|
|
|||
|
|
@ -762,8 +762,6 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
r600_begin_new_cs(ctx);
|
||||
}
|
||||
|
||||
void r600_begin_new_cs(struct r600_context *ctx)
|
||||
|
|
@ -1129,3 +1127,49 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
|||
dst_offset += byte_count;
|
||||
}
|
||||
}
|
||||
|
||||
void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw)
|
||||
{
|
||||
/* The number of dwords we already used in the DMA so far. */
|
||||
num_dw += ctx->rings.dma.cs->cdw;
|
||||
/* Flush if there's not enough space. */
|
||||
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
|
||||
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
|
||||
}
|
||||
}
|
||||
|
||||
void r600_dma_copy(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
struct pipe_resource *src,
|
||||
unsigned long dst_offset,
|
||||
unsigned long src_offset,
|
||||
unsigned long size)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
|
||||
unsigned i, ncopy, csize, shift;
|
||||
struct r600_resource *rdst = (struct r600_resource*)dst;
|
||||
struct r600_resource *rsrc = (struct r600_resource*)src;
|
||||
|
||||
/* make sure that the dma ring is only one active */
|
||||
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
|
||||
size >>= 2;
|
||||
shift = 2;
|
||||
ncopy = (size / 0xffff) + !!(size % 0xffff);
|
||||
|
||||
r600_need_dma_space(rctx, ncopy * 5);
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = size < 0xffff ? size : 0xffff;
|
||||
/* emit reloc before writting cs so that cs is always in consistent state */
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, rsrc, RADEON_USAGE_READ);
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, rdst, RADEON_USAGE_WRITE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
|
||||
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
|
||||
cs->buf[cs->cdw++] = src_offset & 0xfffffffc;
|
||||
cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
|
||||
cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
|
||||
dst_offset += csize << shift;
|
||||
src_offset += csize << shift;
|
||||
size -= csize;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_simple_shaders.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/u_math.h"
|
||||
#include "vl/vl_decoder.h"
|
||||
#include "vl/vl_video_buffer.h"
|
||||
#include "os/os_time.h"
|
||||
|
|
@ -128,12 +129,13 @@ static void r600_flush(struct pipe_context *ctx, unsigned flags)
|
|||
}
|
||||
|
||||
r600_context_flush(rctx, flags);
|
||||
rctx->rings.gfx.flushing = false;
|
||||
r600_begin_new_cs(rctx);
|
||||
|
||||
/* Re-enable render condition. */
|
||||
if (render_cond) {
|
||||
ctx->render_condition(ctx, render_cond, render_cond_mode);
|
||||
}
|
||||
rctx->rings.gfx.flushing = false;
|
||||
}
|
||||
|
||||
static void r600_flush_from_st(struct pipe_context *ctx,
|
||||
|
|
@ -1111,8 +1113,10 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
|
|||
|
||||
if (rscreen->chip_class >= EVERGREEN) {
|
||||
rscreen->screen.is_format_supported = evergreen_is_format_supported;
|
||||
rscreen->dma_blit = &evergreen_dma_blit;
|
||||
} else {
|
||||
rscreen->screen.is_format_supported = r600_is_format_supported;
|
||||
rscreen->dma_blit = &r600_dma_blit;
|
||||
}
|
||||
rscreen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
|
||||
rscreen->screen.context_create = r600_create_context;
|
||||
|
|
|
|||
|
|
@ -220,6 +220,14 @@ enum r600_msaa_texture_mode {
|
|||
MSAA_TEXTURE_COMPRESSED
|
||||
};
|
||||
|
||||
typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y, unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
|
||||
struct r600_screen {
|
||||
struct pipe_screen screen;
|
||||
struct radeon_winsys *ws;
|
||||
|
|
@ -243,6 +251,7 @@ struct r600_screen {
|
|||
uint32_t *trace_ptr;
|
||||
unsigned cs_count;
|
||||
#endif
|
||||
r600g_dma_blit_t dma_blit;
|
||||
};
|
||||
|
||||
struct r600_pipe_sampler_view {
|
||||
|
|
|
|||
|
|
@ -2945,3 +2945,193 @@ void r600_update_db_shader_control(struct r600_context * rctx)
|
|||
rctx->db_misc_state.atom.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE unsigned r600_array_mode(unsigned mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED;
|
||||
break;
|
||||
case RADEON_SURF_MODE_1D: return V_0280A0_ARRAY_1D_TILED_THIN1;
|
||||
break;
|
||||
case RADEON_SURF_MODE_2D: return V_0280A0_ARRAY_2D_TILED_THIN1;
|
||||
default:
|
||||
case RADEON_SURF_MODE_LINEAR: return V_0280A0_ARRAY_LINEAR_GENERAL;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean r600_dma_copy_tile(struct r600_context *rctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x,
|
||||
unsigned dst_y,
|
||||
unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
unsigned src_x,
|
||||
unsigned src_y,
|
||||
unsigned src_z,
|
||||
unsigned copy_height,
|
||||
unsigned pitch,
|
||||
unsigned bpp)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
|
||||
struct r600_texture *rsrc = (struct r600_texture*)src;
|
||||
struct r600_texture *rdst = (struct r600_texture*)dst;
|
||||
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
|
||||
unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
|
||||
unsigned long base, addr;
|
||||
|
||||
/* make sure that the dma ring is only one active */
|
||||
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
|
||||
dst_mode = rdst->surface.level[dst_level].mode;
|
||||
src_mode = rsrc->surface.level[src_level].mode;
|
||||
/* downcast linear aligned to linear to simplify test */
|
||||
src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
|
||||
dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
|
||||
assert(dst_mode != src_mode);
|
||||
|
||||
y = 0;
|
||||
lbpp = util_logbase2(bpp);
|
||||
pitch_tile_max = ((pitch / bpp) >> 3) - 1;
|
||||
|
||||
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
|
||||
/* T2L */
|
||||
array_mode = r600_array_mode(src_mode);
|
||||
slice_tile_max = (((pitch * rsrc->surface.level[src_level].npix_y) >> 6) / bpp) - 1;
|
||||
/* linear height must be the same as the slice tile max height, it's ok even
|
||||
* if the linear destination/source have smaller heigh as the size of the
|
||||
* dma packet will be using the copy_height which is always smaller or equal
|
||||
* to the linear height
|
||||
*/
|
||||
height = rsrc->surface.level[src_level].npix_y;
|
||||
detile = 1;
|
||||
x = src_x;
|
||||
y = src_y;
|
||||
z = src_z;
|
||||
base = rsrc->surface.level[src_level].offset;
|
||||
addr = rdst->surface.level[dst_level].offset;
|
||||
addr += rdst->surface.level[dst_level].slice_size * dst_z;
|
||||
addr += dst_y * pitch + dst_x * bpp;
|
||||
} else {
|
||||
/* L2T */
|
||||
array_mode = r600_array_mode(dst_mode);
|
||||
slice_tile_max = (((pitch * rdst->surface.level[dst_level].npix_y) >> 6) / bpp) - 1;
|
||||
/* linear height must be the same as the slice tile max height, it's ok even
|
||||
* if the linear destination/source have smaller heigh as the size of the
|
||||
* dma packet will be using the copy_height which is always smaller or equal
|
||||
* to the linear height
|
||||
*/
|
||||
height = rdst->surface.level[dst_level].npix_y;
|
||||
detile = 0;
|
||||
x = dst_x;
|
||||
y = dst_y;
|
||||
z = dst_z;
|
||||
base = rdst->surface.level[dst_level].offset;
|
||||
addr = rsrc->surface.level[src_level].offset;
|
||||
addr += rsrc->surface.level[src_level].slice_size * src_z;
|
||||
addr += src_y * pitch + src_x * bpp;
|
||||
}
|
||||
/* check that we are in dw/base alignment constraint */
|
||||
if ((addr & 0x3) || (base & 0xff)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
size = (copy_height * pitch) >> 2;
|
||||
ncopy = (size / 0x0000ffff) + !!(size % 0x0000ffff);
|
||||
r600_need_dma_space(rctx, ncopy * 7);
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
cheight = copy_height;
|
||||
if (((cheight * pitch) >> 2) > 0x0000ffff) {
|
||||
cheight = (0x0000ffff << 2) / pitch;
|
||||
}
|
||||
size = (cheight * pitch) >> 2;
|
||||
/* emit reloc before writting cs so that cs is always in consistent state */
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, &rsrc->resource, RADEON_USAGE_READ);
|
||||
r600_context_bo_reloc(rctx, &rctx->rings.dma, &rdst->resource, RADEON_USAGE_WRITE);
|
||||
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
|
||||
cs->buf[cs->cdw++] = base >> 8;
|
||||
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
|
||||
(lbpp << 24) | ((height - 1) << 10) |
|
||||
pitch_tile_max;
|
||||
cs->buf[cs->cdw++] = (slice_tile_max << 12) | (z << 0);
|
||||
cs->buf[cs->cdw++] = (x << 3) | (y << 17);
|
||||
cs->buf[cs->cdw++] = addr & 0xfffffffc;
|
||||
cs->buf[cs->cdw++] = (addr >> 32UL) & 0xff;
|
||||
copy_height -= cheight;
|
||||
addr += cheight * pitch;
|
||||
y += cheight;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
boolean r600_dma_blit(struct pipe_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
unsigned dst_x, unsigned dst_y, unsigned dst_z,
|
||||
struct pipe_resource *src,
|
||||
unsigned src_level,
|
||||
const struct pipe_box *src_box)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_texture *rsrc = (struct r600_texture*)src;
|
||||
struct r600_texture *rdst = (struct r600_texture*)dst;
|
||||
unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
|
||||
unsigned src_w, dst_w;
|
||||
|
||||
if (rctx->rings.dma.cs == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if (src->format != dst->format) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
bpp = rdst->surface.bpe;
|
||||
dst_pitch = rdst->surface.level[dst_level].pitch_bytes;
|
||||
src_pitch = rsrc->surface.level[src_level].pitch_bytes;
|
||||
src_w = rsrc->surface.level[src_level].npix_x;
|
||||
dst_w = rdst->surface.level[dst_level].npix_x;
|
||||
copy_height = src_box->height / rsrc->surface.blk_h;
|
||||
|
||||
dst_mode = rdst->surface.level[dst_level].mode;
|
||||
src_mode = rsrc->surface.level[src_level].mode;
|
||||
/* downcast linear aligned to linear to simplify test */
|
||||
src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
|
||||
dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
|
||||
|
||||
if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
|
||||
/* strick requirement on r6xx/r7xx */
|
||||
return FALSE;
|
||||
}
|
||||
/* lot of constraint on alignment this should capture them all */
|
||||
if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (src_mode == dst_mode) {
|
||||
unsigned long dst_offset, src_offset, size;
|
||||
|
||||
/* simple dma blit would do NOTE code here assume :
|
||||
* src_box.x/y == 0
|
||||
* dst_x/y == 0
|
||||
* dst_pitch == src_pitch
|
||||
*/
|
||||
src_offset= rsrc->surface.level[src_level].offset;
|
||||
src_offset += rsrc->surface.level[src_level].slice_size * src_box->z;
|
||||
src_offset += src_box->y * src_pitch + src_box->x * bpp;
|
||||
dst_offset = rdst->surface.level[dst_level].offset;
|
||||
dst_offset += rdst->surface.level[dst_level].slice_size * dst_z;
|
||||
dst_offset += dst_y * dst_pitch + dst_x * bpp;
|
||||
size = src_box->height * src_pitch;
|
||||
/* must be dw aligned */
|
||||
if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
|
||||
return FALSE;
|
||||
}
|
||||
r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
|
||||
} else {
|
||||
return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
|
||||
src, src_level, src_box->x, src_box->y, src_box->z,
|
||||
copy_height, dst_pitch, bpp);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1273,6 +1273,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
return;
|
||||
}
|
||||
|
||||
/* make sure that the gfx ring is only one active */
|
||||
rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
|
||||
if (!r600_update_derived_state(rctx)) {
|
||||
/* useless to render because current rendering command
|
||||
* can't be achieved
|
||||
|
|
@ -1280,9 +1283,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
|||
return;
|
||||
}
|
||||
|
||||
/* make sure that the gfx ring is only one active */
|
||||
rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC);
|
||||
|
||||
if (info.indexed) {
|
||||
/* Initialize the index buffer struct. */
|
||||
pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
|
||||
|
|
|
|||
|
|
@ -35,13 +35,19 @@
|
|||
/* Copy from a full GPU texture to a transfer's staging one. */
|
||||
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
|
||||
struct pipe_resource *dst = &rtransfer->staging->b.b;
|
||||
struct pipe_resource *src = transfer->resource;
|
||||
|
||||
if (src->nr_samples <= 1) {
|
||||
ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
|
||||
src, transfer->level, &transfer->box);
|
||||
if (!rctx->screen->dma_blit(ctx, dst, 0, 0, 0, 0,
|
||||
src, transfer->level,
|
||||
&transfer->box)) {
|
||||
/* async dma could not be use */
|
||||
ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
|
||||
src, transfer->level, &transfer->box);
|
||||
}
|
||||
} else {
|
||||
/* Resolve the resource. */
|
||||
struct pipe_blit_info blit;
|
||||
|
|
@ -66,16 +72,22 @@ static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_t
|
|||
/* Copy from a transfer's staging texture to a full GPU one. */
|
||||
static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
|
||||
struct pipe_resource *texture = transfer->resource;
|
||||
struct pipe_box sbox;
|
||||
|
||||
u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
|
||||
|
||||
ctx->resource_copy_region(ctx, texture, transfer->level,
|
||||
transfer->box.x, transfer->box.y, transfer->box.z,
|
||||
&rtransfer->staging->b.b,
|
||||
0, &sbox);
|
||||
if (!rctx->screen->dma_blit(ctx, texture, transfer->level,
|
||||
transfer->box.x, transfer->box.y, transfer->box.z,
|
||||
&rtransfer->staging->b.b, 0, &sbox)) {
|
||||
/* async dma could not be use */
|
||||
ctx->resource_copy_region(ctx, texture, transfer->level,
|
||||
transfer->box.x, transfer->box.y, transfer->box.z,
|
||||
&rtransfer->staging->b.b,
|
||||
0, &sbox);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned r600_texture_get_offset(struct r600_texture *rtex,
|
||||
|
|
|
|||
|
|
@ -3681,4 +3681,19 @@
|
|||
#define SQ_TEX_INST_SAMPLE_C_G_LB 0x1E
|
||||
#define SQ_TEX_INST_SAMPLE_C_G_LZ 0x1F
|
||||
|
||||
/* async DMA packets */
|
||||
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
|
||||
(((t) & 0x1) << 23) | \
|
||||
(((s) & 0x1) << 22) | \
|
||||
(((n) & 0xFFFF) << 0))
|
||||
/* async DMA Packet types */
|
||||
#define DMA_PACKET_WRITE 0x2
|
||||
#define DMA_PACKET_COPY 0x3
|
||||
#define DMA_PACKET_INDIRECT_BUFFER 0x4
|
||||
#define DMA_PACKET_SEMAPHORE 0x5
|
||||
#define DMA_PACKET_FENCE 0x6
|
||||
#define DMA_PACKET_TRAP 0x7
|
||||
#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
|
||||
#define DMA_PACKET_NOP 0xf
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue