radeonsi: completely rework updating descriptors without CP DMA

The patch has a better explanation. Just a summary here:
- The CPU always uploads a whole descriptor array to previously-unused memory.
- CP DMA isn't used.
- No caches need to be flushed.
- All descriptors are always up-to-date in memory even after a hang, because
  CP DMA doesn't serve as a middle man to update them.

This should bring:
- better hang recovery (descriptors are always up-to-date)
- better GPU performance (no KCACHE and TC flushes)
- worse CPU performance for partial updates (only whole arrays are uploaded)
- less used IB space (no CP_DMA and WRITE_DATA packets)
- simpler code
- hopefully, some of the corruption issues with SI cards will go away.
  If not, we'll know the issue is not here.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
Marek Olšák 2015-07-25 00:53:16 +02:00
parent 781dc7c0e1
commit b0528118df
4 changed files with 129 additions and 272 deletions

View file

@ -24,14 +24,23 @@
* Marek Olšák <marek.olsak@amd.com>
*/
/* Resource binding slots and sampler states (each described with 8 or 4 dwords)
* live in memory on SI.
/* Resource binding slots and sampler states (each described with 8 or
* 4 dwords) are stored in lists in memory which is accessed by shaders
* using scalar load instructions.
*
* This file is responsible for managing lists of resources and sampler states
* in memory and binding them, which means updating those structures in memory.
* This file is responsible for managing such lists. It keeps a copy of all
* descriptors in CPU memory and re-uploads a whole list if some slots have
* been changed.
*
* There is also code for updating shader pointers to resources and sampler
* states. CP DMA functions are here too.
* This code is also reponsible for updating shader pointers to those lists.
*
* Note that CP DMA can't be used for updating the lists, because a GPU hang
* could leave the list in a mid-IB state and the next IB would get wrong
* descriptors and the whole context would be unusable at that point.
* (Note: The register shadowing can't be used due to the same reason)
*
* Also, uploading descriptors to newly allocated memory doesn't require
* a KCACHE flush.
*/
#include "radeon/r600_cs.h"
@ -42,7 +51,6 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#define SI_NUM_CONTEXTS 16
/* NULL image and buffer descriptor.
*
@ -139,159 +147,62 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
}
}
static void si_init_descriptors(struct si_context *sctx,
struct si_descriptors *desc,
static void si_init_descriptors(struct si_descriptors *desc,
unsigned shader_userdata_index,
unsigned element_dw_size,
unsigned num_elements,
void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
unsigned num_elements)
{
assert(num_elements <= sizeof(desc->enabled_mask)*8);
assert(num_elements <= sizeof(desc->dirty_mask)*8);
int i;
desc->atom.emit = (void*)emit_func;
desc->shader_userdata_offset = shader_userdata_index * 4;
assert(num_elements <= sizeof(desc->enabled_mask)*8);
desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
desc->context_size = num_elements * element_dw_size * 4;
desc->list_dirty = true; /* upload the list before the next draw */
desc->shader_userdata_offset = shader_userdata_index * 4;
desc->buffer = (struct r600_resource*)
pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_DEFAULT,
SI_NUM_CONTEXTS * desc->context_size);
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
/* We don't check for CS space here, because this should be called
* only once at context initialization. */
si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address,
desc->buffer->b.b.width0, 0,
R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
/* Initialize the array to NULL descriptors if the element size is 8. */
if (element_dw_size == 8)
for (i = 0; i < num_elements; i++)
memcpy(desc->list + i*element_dw_size, null_descriptor,
sizeof(null_descriptor));
}
static void si_release_descriptors(struct si_descriptors *desc)
{
pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL);
FREE(desc->list);
}
static void si_update_descriptors(struct si_context *sctx,
static bool si_upload_descriptors(struct si_context *sctx,
struct si_descriptors *desc)
{
if (desc->dirty_mask) {
desc->atom.num_dw =
7 + /* copy */
(4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask); /* update */
unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
void *ptr;
desc->atom.dirty = true;
desc->pointer_dirty = true;
sctx->shader_userdata.atom.dirty = true;
if (!desc->list_dirty)
return true;
/* TODO: Investigate if these flushes can be removed after
* adding CE support. */
u_upload_alloc(sctx->b.uploader, 0, list_size,
&desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, &ptr);
if (!desc->buffer)
return false; /* skip the draw call */
/* The descriptors are read with the K cache. */
sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
/* Since SI uses uncached CP DMA to update descriptors,
* we have to flush TC L2, which is used to fetch constants
* along with KCACHE. */
if (sctx->b.chip_class == SI)
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
} else {
desc->atom.dirty = false;
}
}
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
static void si_emit_descriptors(struct si_context *sctx,
struct si_descriptors *desc,
uint32_t **descriptors)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
uint64_t va_base;
int packet_start = 0;
int packet_size = 0;
int last_index = desc->num_elements; /* point to a non-existing element */
uint64_t dirty_mask = desc->dirty_mask;
unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
assert(dirty_mask);
va_base = desc->buffer->gpu_address;
/* Copy the descriptors to a new context slot. */
si_emit_cp_dma_copy_buffer(sctx,
va_base + new_context_id * desc->context_size,
va_base + desc->current_context_id * desc->context_size,
desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
va_base += new_context_id * desc->context_size;
/* Update the descriptors.
* Updates of consecutive descriptors are merged to one WRITE_DATA packet.
*
* XXX When unbinding lots of resources, consider clearing the memory
* with CP DMA instead of emitting zeros.
*/
while (dirty_mask) {
int i = u_bit_scan64(&dirty_mask);
assert(i < desc->num_elements);
if (last_index+1 == i && packet_size) {
/* Append new data at the end of the last packet. */
packet_size += desc->element_dw_size;
cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0);
} else {
/* Start a new packet. */
uint64_t va = va_base + i * desc->element_dw_size * 4;
packet_start = cs->cdw;
packet_size = 2 + desc->element_dw_size;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ?
PKT3_WRITE_DATA_DST_SEL_MEM_SYNC :
PKT3_WRITE_DATA_DST_SEL_TC_L2) |
PKT3_WRITE_DATA_WR_CONFIRM |
PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
radeon_emit(cs, va & 0xFFFFFFFFUL);
radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
}
radeon_emit_array(cs, descriptors[i], desc->element_dw_size);
last_index = i;
}
desc->dirty_mask = 0;
desc->current_context_id = new_context_id;
desc->list_dirty = false;
desc->pointer_dirty = true;
sctx->shader_userdata.atom.dirty = true;
return true;
}
/* SAMPLER VIEWS */
static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *atom)
{
struct si_sampler_views *views = (struct si_sampler_views*)atom;
si_emit_descriptors(sctx, &views->desc, views->desc_data);
}
static void si_init_sampler_views(struct si_context *sctx,
struct si_sampler_views *views)
{
int i;
si_init_descriptors(sctx, &views->desc, SI_SGPR_RESOURCE,
8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
for (i = 0; i < views->desc.num_elements; i++) {
views->desc_data[i] = null_descriptor;
views->desc.dirty_mask |= 1llu << i;
}
si_update_descriptors(sctx, &views->desc);
}
static void si_release_sampler_views(struct si_sampler_views *views)
{
int i;
@ -332,6 +243,8 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
si_get_resource_ro_priority(rview->resource));
}
if (!views->desc.buffer)
return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
@ -354,17 +267,16 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
si_get_resource_ro_priority(rview->resource));
pipe_sampler_view_reference(&views->views[slot], view);
views->desc_data[slot] = view_desc;
memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
views->desc_data[slot] = null_descriptor;
memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
views->desc.dirty_mask |= 1llu << slot;
views->desc.list_dirty = true;
}
static void si_set_sampler_views(struct pipe_context *ctx,
@ -423,22 +335,15 @@ static void si_set_sampler_views(struct pipe_context *ctx,
NULL, NULL);
}
}
si_update_descriptors(sctx, &samplers->views.desc);
}
/* SAMPLER STATES */
static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom)
{
struct si_sampler_states *states = (struct si_sampler_states*)atom;
si_emit_descriptors(sctx, &states->desc, states->desc_data);
}
static void si_sampler_states_begin_new_cs(struct si_context *sctx,
struct si_sampler_states *states)
{
if (!states->desc.buffer)
return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
}
@ -460,64 +365,39 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
for (i = 0; i < count; i++) {
unsigned slot = start + i;
if (!sstates[i]) {
samplers->desc.dirty_mask &= ~(1llu << slot);
if (!sstates[i])
continue;
}
samplers->desc_data[slot] = sstates[i]->val;
samplers->desc.dirty_mask |= 1llu << slot;
memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
samplers->desc.list_dirty = true;
}
si_update_descriptors(sctx, &samplers->desc);
}
/* BUFFER RESOURCES */
static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom *atom)
{
struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom;
si_emit_descriptors(sctx, &buffers->desc, buffers->desc_data);
}
static void si_init_buffer_resources(struct si_context *sctx,
struct si_buffer_resources *buffers,
static void si_init_buffer_resources(struct si_buffer_resources *buffers,
unsigned num_buffers,
unsigned shader_userdata_index,
enum radeon_bo_usage shader_usage,
enum radeon_bo_priority priority)
{
int i;
buffers->num_buffers = num_buffers;
buffers->shader_usage = shader_usage;
buffers->priority = priority;
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);
/* si_emit_descriptors only accepts an array of arrays.
* This adds such an array. */
buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*));
for (i = 0; i < num_buffers; i++) {
buffers->desc_data[i] = &buffers->desc_storage[i*4];
}
si_init_descriptors(sctx, &buffers->desc, shader_userdata_index, 4,
num_buffers, si_emit_buffer_resources);
si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
num_buffers);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers)
{
int i;
for (i = 0; i < buffers->num_buffers; i++) {
for (i = 0; i < buffers->desc.num_elements; i++) {
pipe_resource_reference(&buffers->buffers[i], NULL);
}
FREE(buffers->buffers);
FREE(buffers->desc_storage);
FREE(buffers->desc_data);
si_release_descriptors(&buffers->desc);
}
@ -535,6 +415,8 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
buffers->shader_usage, buffers->priority);
}
if (!buffers->desc.buffer)
return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_DATA);
@ -560,12 +442,15 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
}
if (!desc->buffer)
return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
}
void si_update_vertex_buffers(struct si_context *sctx)
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
struct si_descriptors *desc = &sctx->vertex_buffers;
bool bound[SI_NUM_VERTEX_BUFFERS] = {};
@ -573,8 +458,10 @@ void si_update_vertex_buffers(struct si_context *sctx)
uint64_t va;
uint32_t *ptr;
if (!sctx->vertex_buffers_dirty)
return true;
if (!count || !sctx->vertex_elements)
return;
return true;
/* Vertex buffer descriptors are the only ones which are uploaded
* directly through a staging buffer and don't go through
@ -582,13 +469,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
*/
u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
if (!desc->buffer)
return false;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
assert(count <= SI_NUM_VERTEX_BUFFERS);
assert(desc->current_context_id == 0);
for (i = 0; i < count; i++) {
struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
@ -640,6 +528,8 @@ void si_update_vertex_buffers(struct si_context *sctx)
* cache is needed. */
desc->pointer_dirty = true;
sctx->shader_userdata.atom.dirty = true;
sctx->vertex_buffers_dirty = false;
return true;
}
@ -664,7 +554,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
if (shader >= SI_NUM_SHADERS)
return;
assert(slot < buffers->num_buffers);
assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
@ -691,7 +581,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
}
/* Set the descriptor. */
uint32_t *desc = buffers->desc_data[slot];
uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(0);
@ -710,12 +600,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
buffers->desc.dirty_mask |= 1llu << slot;
si_update_descriptors(sctx, &buffers->desc);
buffers->desc.list_dirty = true;
}
/* RING BUFFERS */
@ -735,7 +624,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
/* The stride field in the resource descriptor has 14 bits */
assert(stride < (1 << 14));
assert(slot < buffers->num_buffers);
assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
if (buffer) {
@ -780,7 +669,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
}
/* Set the descriptor. */
uint32_t *desc = buffers->desc_data[slot];
uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride) |
@ -803,12 +692,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
buffers->desc.dirty_mask |= 1llu << slot;
si_update_descriptors(sctx, &buffers->desc);
buffers->desc.list_dirty = true;
}
/* STREAMOUT BUFFERS */
@ -870,7 +758,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
uint64_t va = r600_resource(buffer)->gpu_address;
/* Set the descriptor. */
uint32_t *desc = buffers->desc_data[bufidx];
uint32_t *desc = buffers->desc.list + bufidx*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = 0xffffffff;
@ -888,24 +776,22 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
buffers->desc.enabled_mask |= 1llu << bufidx;
} else {
/* Clear the descriptor and unset the resource. */
memset(buffers->desc_data[bufidx], 0,
memset(buffers->desc.list + bufidx*4, 0,
sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx],
NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
}
buffers->desc.dirty_mask |= 1llu << bufidx;
}
for (; i < old_num_targets; i++) {
bufidx = SI_SO_BUF_OFFSET + i;
/* Clear the descriptor and unset the resource. */
memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx], NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
buffers->desc.dirty_mask |= 1llu << bufidx;
}
si_update_descriptors(sctx, &buffers->desc);
buffers->desc.list_dirty = true;
}
static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
@ -974,22 +860,19 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Read/Write buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
buffers->desc.dirty_mask |= 1llu << i;
found = true;
if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
/* Update the streamout state. */
if (sctx->b.streamout.begin_emitted) {
@ -1001,34 +884,25 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
}
if (found) {
si_update_descriptors(sctx, &buffers->desc);
}
}
/* Constant buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
buffers->desc.dirty_mask |= 1llu << i;
found = true;
}
}
if (found) {
si_update_descriptors(sctx, &buffers->desc);
}
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
@ -1040,23 +914,20 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_sampler_views *views = &sctx->samplers[shader].views;
bool found = false;
uint64_t mask = views->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (views->views[i]->texture == buf) {
si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
old_va, buf);
views->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_BUFFER_RO);
views->desc.dirty_mask |= 1llu << i;
found = true;
}
}
if (found) {
si_update_descriptors(sctx, &views->desc);
}
}
}
@ -1297,11 +1168,10 @@ static void si_emit_shader_pointer(struct si_context *sctx,
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
uint64_t va;
if (!desc->pointer_dirty)
if (!desc->pointer_dirty || !desc->buffer)
return;
va = desc->buffer->gpu_address +
desc->current_context_id * desc->context_size +
desc->buffer_offset;
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
@ -1351,34 +1221,28 @@ static void si_emit_shader_userdata(struct si_context *sctx,
si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
/* INIT/DEINIT */
/* INIT/DEINIT/UPLOAD */
void si_init_all_descriptors(struct si_context *sctx)
{
int i;
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(sctx, &sctx->const_buffers[i],
si_init_buffer_resources(&sctx->const_buffers[i],
SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
si_init_sampler_views(sctx, &sctx->samplers[i].views);
si_init_descriptors(sctx, &sctx->samplers[i].states.desc,
SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES,
si_emit_sampler_states);
sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
sctx->atoms.s.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
si_init_descriptors(&sctx->samplers[i].states.desc,
SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES);
}
si_init_descriptors(sctx, &sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
4, SI_NUM_VERTEX_BUFFERS, NULL);
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
4, SI_NUM_VERTEX_BUFFERS);
/* Set pipe_context functions. */
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
@ -1401,6 +1265,20 @@ void si_init_all_descriptors(struct si_context *sctx)
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
}
bool si_upload_shader_descriptors(struct si_context *sctx)
{
int i;
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
!si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
return false;
}
return si_upload_vertex_buffer_descriptors(sctx);
}
void si_release_all_descriptors(struct si_context *sctx)
{
int i;

View file

@ -142,12 +142,6 @@ struct si_context {
union {
struct {
/* The order matters. */
struct r600_atom *const_buffers[SI_NUM_SHADERS];
struct r600_atom *rw_buffers[SI_NUM_SHADERS];
struct r600_atom *sampler_views[SI_NUM_SHADERS];
struct r600_atom *sampler_states[SI_NUM_SHADERS];
/* Caches must be flushed after resource descriptors are
* updated in memory. */
struct r600_atom *cache_flush;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */

View file

@ -158,60 +158,48 @@ struct si_shader_data {
#define SI_NUM_VERTEX_BUFFERS 16
/* This represents resource descriptors in memory, such as buffer resources,
/* This represents descriptors in memory, such as buffer resources,
* image resources, and sampler states.
*/
struct si_descriptors {
struct r600_atom atom;
/* The size of one resource descriptor. */
/* The list of descriptors in malloc'd memory. */
uint32_t *list;
/* The size of one descriptor. */
unsigned element_dw_size;
/* The maximum number of resource descriptors. */
/* The maximum number of descriptors. */
unsigned num_elements;
/* Whether the list has been changed and should be re-uploaded. */
bool list_dirty;
/* The buffer where resource descriptors are stored. */
/* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
unsigned buffer_offset;
/* The i-th bit is set if that element is dirty (changed but not emitted). */
uint64_t dirty_mask;
/* The i-th bit is set if that element is enabled (non-NULL resource). */
uint64_t enabled_mask;
/* We can't update descriptors directly because the GPU might be
* reading them at the same time, so we have to update them
* in a copy-on-write manner. Each such copy is called a context,
* which is just another array descriptors in the same buffer. */
unsigned current_context_id;
/* The size of a context, should be equal to 4*element_dw_size*num_elements. */
unsigned context_size;
/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
unsigned shader_userdata_offset;
/* Whether the pointer should be re-emitted. */
bool pointer_dirty;
};
struct si_sampler_views {
struct si_descriptors desc;
struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS];
uint32_t *desc_data[SI_NUM_SAMPLER_VIEWS];
};
struct si_sampler_states {
struct si_descriptors desc;
uint32_t *desc_data[SI_NUM_SAMPLER_STATES];
void *saved_states[2]; /* saved for u_blitter */
};
struct si_buffer_resources {
struct si_descriptors desc;
unsigned num_buffers;
enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */
enum radeon_bo_priority priority;
struct pipe_resource **buffers; /* this has num_buffers elements */
uint32_t *desc_storage; /* this has num_buffers*4 elements */
uint32_t **desc_data; /* an array of pointers pointing to desc_storage */
};
#define si_pm4_block_idx(member) \
@ -247,13 +235,13 @@ struct si_buffer_resources {
/* si_descriptors.c */
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
unsigned start, unsigned count, void **states);
void si_update_vertex_buffers(struct si_context *sctx);
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
unsigned element_size, unsigned index_stride, uint64_t offset);
void si_init_all_descriptors(struct si_context *sctx);
bool si_upload_shader_descriptors(struct si_context *sctx);
void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
void si_copy_buffer(struct si_context *sctx,

View file

@ -743,11 +743,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->current_rast_prim = info->mode;
si_update_shaders(sctx);
if (sctx->vertex_buffers_dirty) {
si_update_vertex_buffers(sctx);
sctx->vertex_buffers_dirty = false;
}
if (!si_upload_shader_descriptors(sctx))
return;
if (info->indexed) {
/* Initialize the index buffer struct. */