mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 11:58:10 +02:00
radeonsi: merge SI and CI dma_clear_buffer and remove the callback
also use assertions for the requirements that offset and size are a multiple of 4. Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
This commit is contained in:
parent
be0bd95abf
commit
1119fe5c25
8 changed files with 66 additions and 90 deletions
|
|
@ -120,8 +120,7 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe
|
|||
{
|
||||
struct si_context *sctx = (struct si_context*)context;
|
||||
|
||||
sctx->dma_clear_buffer(sctx, &buffer->res->b.b, 0,
|
||||
buffer->res->buf->size, 0);
|
||||
si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
|
||||
context->flush(context, NULL, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,46 +67,6 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
static void cik_sdma_clear_buffer(struct si_context *sctx,
|
||||
struct pipe_resource *dst,
|
||||
uint64_t offset,
|
||||
uint64_t size,
|
||||
unsigned clear_value)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = sctx->dma_cs;
|
||||
unsigned i, ncopy, csize;
|
||||
struct r600_resource *rdst = r600_resource(dst);
|
||||
|
||||
if (!cs || offset % 4 != 0 || size % 4 != 0 ||
|
||||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
|
||||
sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mark the buffer range of destination as valid (initialized),
|
||||
* so that transfer_map knows it should wait for the GPU when mapping
|
||||
* that range. */
|
||||
util_range_add(&rdst->valid_buffer_range, offset, offset + size);
|
||||
|
||||
offset += rdst->gpu_address;
|
||||
|
||||
/* the same maximum size as for copying */
|
||||
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||
si_need_dma_space(sctx, ncopy * 5, rdst, NULL);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0,
|
||||
0x8000 /* dword copy */));
|
||||
radeon_emit(cs, offset);
|
||||
radeon_emit(cs, offset >> 32);
|
||||
radeon_emit(cs, clear_value);
|
||||
radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
|
||||
offset += csize;
|
||||
size -= csize;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
|
||||
{
|
||||
width = u_minify(width, level);
|
||||
|
|
@ -554,5 +514,4 @@ fallback:
|
|||
void cik_init_sdma_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->dma_copy = cik_sdma_copy;
|
||||
sctx->dma_clear_buffer = cik_sdma_clear_buffer;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -301,7 +301,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||
* of them are moved to SDMA thanks to this. */
|
||||
!ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
|
||||
RADEON_USAGE_READWRITE))) {
|
||||
sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
|
||||
si_sdma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
|
||||
|
||||
offset += dma_clear_size;
|
||||
size -= dma_clear_size;
|
||||
|
|
|
|||
|
|
@ -77,45 +77,6 @@ static void si_dma_copy_buffer(struct si_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
static void si_dma_clear_buffer(struct si_context *sctx,
|
||||
struct pipe_resource *dst,
|
||||
uint64_t offset,
|
||||
uint64_t size,
|
||||
unsigned clear_value)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = sctx->dma_cs;
|
||||
unsigned i, ncopy, csize;
|
||||
struct r600_resource *rdst = r600_resource(dst);
|
||||
|
||||
if (!cs || offset % 4 != 0 || size % 4 != 0 ||
|
||||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
|
||||
sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mark the buffer range of destination as valid (initialized),
|
||||
* so that transfer_map knows it should wait for the GPU when mapping
|
||||
* that range. */
|
||||
util_range_add(&rdst->valid_buffer_range, offset, offset + size);
|
||||
|
||||
offset += rdst->gpu_address;
|
||||
|
||||
/* the same maximum size as for copying */
|
||||
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||
si_need_dma_space(sctx, ncopy * 4, rdst, NULL);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||
radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0,
|
||||
csize / 4));
|
||||
radeon_emit(cs, offset);
|
||||
radeon_emit(cs, clear_value);
|
||||
radeon_emit(cs, (offset >> 32) << 16);
|
||||
offset += csize;
|
||||
size -= csize;
|
||||
}
|
||||
}
|
||||
|
||||
static void si_dma_copy_tile(struct si_context *ctx,
|
||||
struct pipe_resource *dst,
|
||||
unsigned dst_level,
|
||||
|
|
@ -325,5 +286,4 @@ fallback:
|
|||
void si_init_dma_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->dma_copy = si_dma_copy;
|
||||
sctx->dma_clear_buffer = si_dma_clear_buffer;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,6 +64,65 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
|
|||
radeon_emit(cs, va >> 32);
|
||||
}
|
||||
|
||||
void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
uint64_t offset, uint64_t size, unsigned clear_value)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = sctx->dma_cs;
|
||||
unsigned i, ncopy, csize;
|
||||
struct r600_resource *rdst = r600_resource(dst);
|
||||
|
||||
assert(offset % 4 == 0);
|
||||
assert(size);
|
||||
assert(size % 4 == 0);
|
||||
|
||||
if (!cs || dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
|
||||
sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Mark the buffer range of destination as valid (initialized),
|
||||
* so that transfer_map knows it should wait for the GPU when mapping
|
||||
* that range. */
|
||||
util_range_add(&rdst->valid_buffer_range, offset, offset + size);
|
||||
|
||||
offset += rdst->gpu_address;
|
||||
|
||||
if (sctx->chip_class == SI) {
|
||||
/* the same maximum size as for copying */
|
||||
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||
si_need_dma_space(sctx, ncopy * 4, rdst, NULL);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||
radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0,
|
||||
csize / 4));
|
||||
radeon_emit(cs, offset);
|
||||
radeon_emit(cs, clear_value);
|
||||
radeon_emit(cs, (offset >> 32) << 16);
|
||||
offset += csize;
|
||||
size -= csize;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* The following code is for CI, VI, Vega/Raven, etc. */
|
||||
/* the same maximum size as for copying */
|
||||
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||
si_need_dma_space(sctx, ncopy * 5, rdst, NULL);
|
||||
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0,
|
||||
0x8000 /* dword copy */));
|
||||
radeon_emit(cs, offset);
|
||||
radeon_emit(cs, offset >> 32);
|
||||
radeon_emit(cs, clear_value);
|
||||
radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
|
||||
offset += csize;
|
||||
size -= csize;
|
||||
}
|
||||
}
|
||||
|
||||
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
|
||||
struct r600_resource *dst, struct r600_resource *src)
|
||||
{
|
||||
|
|
@ -170,7 +229,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst
|
|||
struct si_context *ctx = (struct si_context*)sscreen->aux_context;
|
||||
|
||||
mtx_lock(&sscreen->aux_context_lock);
|
||||
ctx->dma_clear_buffer(ctx, dst, offset, size, value);
|
||||
si_sdma_clear_buffer(ctx, dst, offset, size, value);
|
||||
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
|
||||
mtx_unlock(&sscreen->aux_context_lock);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -750,7 +750,7 @@ static void si_test_vmfault(struct si_screen *sscreen)
|
|||
puts("VM fault test: CP - done.");
|
||||
}
|
||||
if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) {
|
||||
sctx->dma_clear_buffer(sctx, buf, 0, 4, 0);
|
||||
si_sdma_clear_buffer(sctx, buf, 0, 4, 0);
|
||||
ctx->flush(ctx, NULL, 0);
|
||||
puts("VM fault test: SDMA - done.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1029,9 +1029,6 @@ struct si_context {
|
|||
unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
|
||||
void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
|
||||
uint64_t offset, uint64_t size, unsigned value);
|
||||
|
||||
struct si_tracked_regs tracked_regs;
|
||||
};
|
||||
|
||||
|
|
@ -1159,6 +1156,8 @@ void si_init_dma_functions(struct si_context *sctx);
|
|||
/* si_dma_cs.c */
|
||||
void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
|
||||
uint64_t offset);
|
||||
void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
uint64_t offset, uint64_t size, unsigned clear_value);
|
||||
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
|
||||
struct r600_resource *dst, struct r600_resource *src);
|
||||
void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
|
|||
u_box_1d(0, size, &box);
|
||||
sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
|
||||
} else {
|
||||
sctx->dma_clear_buffer(sctx, dst, 0, size, clear_value);
|
||||
si_sdma_clear_buffer(sctx, dst, 0, size, clear_value);
|
||||
}
|
||||
} else {
|
||||
/* Compute */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue