mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-22 09:40:40 +02:00
radeonsi: move vertex buffer descriptors from IB to memory
This removes the intermediate storage (pm4 state) and generates descriptors directly in a staging buffer. It also reduces the number of flushes, because the descriptors no longer take CS space. Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
parent
1635ded828
commit
0ed0bf0696
7 changed files with 133 additions and 106 deletions
|
|
@ -166,11 +166,13 @@ static void si_update_descriptors(struct si_context *sctx,
|
|||
}
|
||||
|
||||
static void si_emit_shader_pointer(struct si_context *sctx,
|
||||
struct si_descriptors *desc)
|
||||
struct r600_atom *atom)
|
||||
{
|
||||
struct si_descriptors *desc = (struct si_descriptors*)atom;
|
||||
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
|
||||
uint64_t va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b) +
|
||||
desc->current_context_id * desc->context_size;
|
||||
desc->current_context_id * desc->context_size +
|
||||
desc->buffer_offset;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
|
||||
radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
|
||||
|
|
@ -253,7 +255,7 @@ static void si_emit_descriptors(struct si_context *sctx,
|
|||
desc->current_context_id = new_context_id;
|
||||
|
||||
/* Now update the shader userdata pointer. */
|
||||
si_emit_shader_pointer(sctx, desc);
|
||||
si_emit_shader_pointer(sctx, &desc->atom);
|
||||
}
|
||||
|
||||
static unsigned si_get_shader_user_data_base(unsigned shader)
|
||||
|
|
@ -330,7 +332,7 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
|||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
|
||||
|
||||
si_emit_shader_pointer(sctx, &views->desc);
|
||||
si_emit_shader_pointer(sctx, &views->desc.atom);
|
||||
}
|
||||
|
||||
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
|
||||
|
|
@ -432,7 +434,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
|
|||
{
|
||||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
|
||||
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
|
||||
si_emit_shader_pointer(sctx, &states->desc);
|
||||
si_emit_shader_pointer(sctx, &states->desc.atom);
|
||||
}
|
||||
|
||||
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
|
||||
|
|
@ -533,9 +535,119 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
|||
buffers->desc.buffer, RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
|
||||
si_emit_shader_pointer(sctx, &buffers->desc);
|
||||
si_emit_shader_pointer(sctx, &buffers->desc.atom);
|
||||
}
|
||||
|
||||
/* VERTEX BUFFERS */
|
||||
|
||||
static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
|
||||
{
|
||||
struct si_descriptors *desc = &sctx->vertex_buffers;
|
||||
int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
int vb = sctx->vertex_elements->elements[i].vertex_buffer_index;
|
||||
|
||||
if (vb >= sctx->nr_vertex_buffers)
|
||||
continue;
|
||||
if (!sctx->vertex_buffer[vb].buffer)
|
||||
continue;
|
||||
|
||||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
}
|
||||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
|
||||
desc->buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
|
||||
si_emit_shader_pointer(sctx, &desc->atom);
|
||||
}
|
||||
|
||||
void si_update_vertex_buffers(struct si_context *sctx)
|
||||
{
|
||||
struct pipe_context *ctx = &sctx->b.b;
|
||||
struct si_descriptors *desc = &sctx->vertex_buffers;
|
||||
bool bound[SI_NUM_VERTEX_BUFFERS] = {};
|
||||
unsigned i, count = sctx->vertex_elements->count;
|
||||
uint64_t va;
|
||||
uint32_t *ptr;
|
||||
|
||||
if (!count || !sctx->vertex_elements)
|
||||
return;
|
||||
|
||||
/* Vertex buffer descriptors are the only ones which are uploaded
|
||||
* directly through a staging buffer and don't go through
|
||||
* the fine-grained upload path.
|
||||
*/
|
||||
u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
|
||||
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
|
||||
|
||||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
|
||||
desc->buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
|
||||
assert(count <= SI_NUM_VERTEX_BUFFERS);
|
||||
assert(desc->current_context_id == 0);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
|
||||
struct pipe_vertex_buffer *vb;
|
||||
struct r600_resource *rbuffer;
|
||||
unsigned offset;
|
||||
uint32_t *desc = &ptr[i*4];
|
||||
|
||||
if (ve->vertex_buffer_index >= sctx->nr_vertex_buffers) {
|
||||
memset(desc, 0, 16);
|
||||
continue;
|
||||
}
|
||||
|
||||
vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
|
||||
rbuffer = (struct r600_resource*)vb->buffer;
|
||||
if (rbuffer == NULL) {
|
||||
memset(desc, 0, 16);
|
||||
continue;
|
||||
}
|
||||
|
||||
offset = vb->buffer_offset + ve->src_offset;
|
||||
|
||||
va = r600_resource_va(ctx->screen, (void*)rbuffer);
|
||||
va += offset;
|
||||
|
||||
/* Fill in T# buffer resource description */
|
||||
desc[0] = va & 0xFFFFFFFF;
|
||||
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
|
||||
S_008F04_STRIDE(vb->stride);
|
||||
if (vb->stride)
|
||||
/* Round up by rounding down and adding 1 */
|
||||
desc[2] = (vb->buffer->width0 - offset -
|
||||
sctx->vertex_elements->format_size[i]) /
|
||||
vb->stride + 1;
|
||||
else
|
||||
desc[2] = vb->buffer->width0 - offset;
|
||||
|
||||
desc[3] = sctx->vertex_elements->rsrc_word3[i];
|
||||
|
||||
if (!bound[ve->vertex_buffer_index]) {
|
||||
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
|
||||
(struct r600_resource*)vb->buffer,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
bound[ve->vertex_buffer_index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */
|
||||
desc->atom.dirty = true;
|
||||
|
||||
/* Don't flush the const cache. It would have a very negative effect
|
||||
* on performance (confirmed by testing). New descriptors are always
|
||||
* uploaded to a fresh new buffer, so I don't think flushing the const
|
||||
* cache is needed. */
|
||||
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
|
||||
}
|
||||
|
||||
|
||||
/* CONSTANT BUFFERS */
|
||||
|
||||
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
|
||||
|
|
@ -1096,6 +1208,11 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||
sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
|
||||
}
|
||||
|
||||
si_init_descriptors(sctx, &sctx->vertex_buffers,
|
||||
si_get_shader_user_data_base(PIPE_SHADER_VERTEX) +
|
||||
SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS,
|
||||
si_emit_shader_pointer);
|
||||
sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom;
|
||||
|
||||
/* Set pipe_context functions. */
|
||||
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
|
||||
|
|
@ -1115,6 +1232,7 @@ void si_release_all_descriptors(struct si_context *sctx)
|
|||
si_release_sampler_views(&sctx->samplers[i].views);
|
||||
si_release_descriptors(&sctx->samplers[i].states.desc);
|
||||
}
|
||||
si_release_descriptors(&sctx->vertex_buffers);
|
||||
}
|
||||
|
||||
void si_all_descriptors_begin_new_cs(struct si_context *sctx)
|
||||
|
|
@ -1127,4 +1245,5 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
|
|||
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
|
||||
si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
|
||||
}
|
||||
si_vertex_buffers_begin_new_cs(sctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ struct si_context {
|
|||
union {
|
||||
struct {
|
||||
/* The order matters. */
|
||||
struct r600_atom *vertex_buffers;
|
||||
struct r600_atom *const_buffers[SI_NUM_SHADERS];
|
||||
struct r600_atom *rw_buffers[SI_NUM_SHADERS];
|
||||
struct r600_atom *sampler_views[SI_NUM_SHADERS];
|
||||
|
|
@ -124,9 +125,10 @@ struct si_context {
|
|||
struct si_cs_shader_state cs_shader_state;
|
||||
/* shader information */
|
||||
unsigned sprite_coord_enable;
|
||||
struct si_descriptors vertex_buffers;
|
||||
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
|
||||
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
|
||||
struct si_textures_info samplers[SI_NUM_SHADERS];
|
||||
struct si_textures_info samplers[SI_NUM_SHADERS];
|
||||
struct r600_resource *border_color_table;
|
||||
unsigned border_color_offset;
|
||||
|
||||
|
|
|
|||
|
|
@ -103,37 +103,6 @@ void si_pm4_add_bo(struct si_pm4_state *state,
|
|||
state->bo_priority[idx] = priority;
|
||||
}
|
||||
|
||||
void si_pm4_sh_data_begin(struct si_pm4_state *state)
|
||||
{
|
||||
si_pm4_cmd_begin(state, PKT3_NOP);
|
||||
}
|
||||
|
||||
void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw)
|
||||
{
|
||||
si_pm4_cmd_add(state, dw);
|
||||
}
|
||||
|
||||
void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx)
|
||||
{
|
||||
unsigned offs = state->last_pm4 + 1;
|
||||
unsigned reg = base + idx * 4;
|
||||
|
||||
/* Bail if no data was added */
|
||||
if (state->ndw == offs) {
|
||||
state->ndw--;
|
||||
return;
|
||||
}
|
||||
|
||||
si_pm4_cmd_end(state, false);
|
||||
|
||||
si_pm4_cmd_begin(state, PKT3_SET_SH_REG_OFFSET);
|
||||
si_pm4_cmd_add(state, (reg - SI_SH_REG_OFFSET) >> 2);
|
||||
state->relocs[state->nrelocs++] = state->ndw;
|
||||
si_pm4_cmd_add(state, offs << 2);
|
||||
si_pm4_cmd_add(state, 0);
|
||||
si_pm4_cmd_end(state, false);
|
||||
}
|
||||
|
||||
void si_pm4_inval_shader_cache(struct si_pm4_state *state)
|
||||
{
|
||||
state->cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
|
||||
|
|
|
|||
|
|
@ -76,10 +76,6 @@ void si_pm4_add_bo(struct si_pm4_state *state,
|
|||
enum radeon_bo_usage usage,
|
||||
enum radeon_bo_priority priority);
|
||||
|
||||
void si_pm4_sh_data_begin(struct si_pm4_state *state);
|
||||
void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw);
|
||||
void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx);
|
||||
|
||||
void si_pm4_inval_shader_cache(struct si_pm4_state *state);
|
||||
void si_pm4_inval_texture_cache(struct si_pm4_state *state);
|
||||
|
||||
|
|
|
|||
|
|
@ -2800,6 +2800,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
|||
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
|
||||
S_008F0C_NUM_FORMAT(num_format) |
|
||||
S_008F0C_DATA_FORMAT(data_format);
|
||||
v->format_size[i] = desc->block.bits / 8;
|
||||
}
|
||||
memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
|
||||
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ struct si_vertex_element
|
|||
{
|
||||
unsigned count;
|
||||
uint32_t rsrc_word3[PIPE_MAX_ATTRIBS];
|
||||
uint32_t format_size[PIPE_MAX_ATTRIBS];
|
||||
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
|
|
@ -97,7 +98,6 @@ union si_state {
|
|||
struct si_pm4_state *vs;
|
||||
struct si_pm4_state *ps;
|
||||
struct si_pm4_state *spi;
|
||||
struct si_pm4_state *vertex_buffers;
|
||||
struct si_pm4_state *draw_info;
|
||||
struct si_pm4_state *draw;
|
||||
} named;
|
||||
|
|
@ -147,6 +147,7 @@ struct si_descriptors {
|
|||
|
||||
/* The buffer where resource descriptors are stored. */
|
||||
struct r600_resource *buffer;
|
||||
unsigned buffer_offset;
|
||||
|
||||
/* The i-th bit is set if that element is dirty (changed but not emitted). */
|
||||
unsigned dirty_mask;
|
||||
|
|
@ -221,6 +222,7 @@ struct si_buffer_resources {
|
|||
/* si_descriptors.c */
|
||||
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
|
||||
unsigned start, unsigned count, void **states);
|
||||
void si_update_vertex_buffers(struct si_context *sctx);
|
||||
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
|
||||
struct pipe_constant_buffer *input,
|
||||
unsigned stride, unsigned num_records,
|
||||
|
|
|
|||
|
|
@ -658,68 +658,6 @@ static void si_update_derived_state(struct si_context *sctx)
|
|||
}
|
||||
}
|
||||
|
||||
static void si_vertex_buffer_update(struct si_context *sctx)
|
||||
{
|
||||
struct pipe_context *ctx = &sctx->b.b;
|
||||
struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx);
|
||||
bool bound[PIPE_MAX_ATTRIBS] = {};
|
||||
unsigned i, count;
|
||||
uint64_t va;
|
||||
|
||||
sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
|
||||
|
||||
count = sctx->vertex_elements->count;
|
||||
assert(count <= 256 / 4);
|
||||
|
||||
si_pm4_sh_data_begin(pm4);
|
||||
for (i = 0 ; i < count; i++) {
|
||||
struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
|
||||
struct pipe_vertex_buffer *vb;
|
||||
struct r600_resource *rbuffer;
|
||||
unsigned offset;
|
||||
|
||||
if (ve->vertex_buffer_index >= sctx->nr_vertex_buffers)
|
||||
continue;
|
||||
|
||||
vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
|
||||
rbuffer = (struct r600_resource*)vb->buffer;
|
||||
if (rbuffer == NULL)
|
||||
continue;
|
||||
|
||||
offset = 0;
|
||||
offset += vb->buffer_offset;
|
||||
offset += ve->src_offset;
|
||||
|
||||
va = r600_resource_va(ctx->screen, (void*)rbuffer);
|
||||
va += offset;
|
||||
|
||||
/* Fill in T# buffer resource description */
|
||||
si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF);
|
||||
si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
|
||||
S_008F04_STRIDE(vb->stride)));
|
||||
if (vb->stride)
|
||||
/* Round up by rounding down and adding 1 */
|
||||
si_pm4_sh_data_add(pm4,
|
||||
(vb->buffer->width0 - offset -
|
||||
util_format_get_blocksize(ve->src_format)) /
|
||||
vb->stride + 1);
|
||||
else
|
||||
si_pm4_sh_data_add(pm4, vb->buffer->width0 - offset);
|
||||
si_pm4_sh_data_add(pm4, sctx->vertex_elements->rsrc_word3[i]);
|
||||
|
||||
if (!bound[ve->vertex_buffer_index]) {
|
||||
si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_BUFFER_RO);
|
||||
bound[ve->vertex_buffer_index] = true;
|
||||
}
|
||||
}
|
||||
si_pm4_sh_data_end(pm4, sctx->gs_shader ?
|
||||
R_00B330_SPI_SHADER_USER_DATA_ES_0 :
|
||||
R_00B130_SPI_SHADER_USER_DATA_VS_0,
|
||||
SI_SGPR_VERTEX_BUFFER);
|
||||
si_pm4_set_state(sctx, vertex_buffers, pm4);
|
||||
}
|
||||
|
||||
static void si_state_draw(struct si_context *sctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_index_buffer *ib)
|
||||
|
|
@ -954,7 +892,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
return;
|
||||
|
||||
si_update_derived_state(sctx);
|
||||
si_vertex_buffer_update(sctx);
|
||||
si_update_vertex_buffers(sctx);
|
||||
|
||||
if (info->indexed) {
|
||||
/* Initialize the index buffer struct. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue