mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
vbo/dlist: use a single buffer object
Instead of using 2 bo, 1 for the indices, one for the vertices store everything in the same buffer. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12646>
This commit is contained in:
parent
edd7733826
commit
f9d12bf50e
3 changed files with 152 additions and 105 deletions
|
|
@ -176,8 +176,8 @@ struct vbo_save_context {
|
||||||
|
|
||||||
struct vbo_save_vertex_store *vertex_store;
|
struct vbo_save_vertex_store *vertex_store;
|
||||||
struct vbo_save_primitive_store *prim_store;
|
struct vbo_save_primitive_store *prim_store;
|
||||||
struct gl_buffer_object *previous_ib;
|
struct gl_buffer_object *current_bo;
|
||||||
unsigned ib_first_free_index;
|
unsigned current_bo_bytes_used;
|
||||||
|
|
||||||
fi_type *buffer_map; /**< Mapping of vertex_store's buffer */
|
fi_type *buffer_map; /**< Mapping of vertex_store's buffer */
|
||||||
fi_type *buffer_ptr; /**< cursor, points into buffer_map */
|
fi_type *buffer_ptr; /**< cursor, points into buffer_map */
|
||||||
|
|
|
||||||
|
|
@ -73,5 +73,5 @@ void vbo_save_destroy( struct gl_context *ctx )
|
||||||
save->vertex_store = NULL;
|
save->vertex_store = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
|
_mesa_reference_buffer_object(ctx, &save->current_bo, NULL);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -601,6 +601,9 @@ compile_vertex_list(struct gl_context *ctx)
|
||||||
|
|
||||||
merge_prims(ctx, node->cold->prims, &node->cold->prim_count);
|
merge_prims(ctx, node->cold->prims, &node->cold->prim_count);
|
||||||
|
|
||||||
|
GLintptr buffer_offset = 0;
|
||||||
|
GLuint start_offset = 0;
|
||||||
|
|
||||||
/* Create an index buffer. */
|
/* Create an index buffer. */
|
||||||
node->cold->min_index = node->cold->max_index = 0;
|
node->cold->min_index = node->cold->max_index = 0;
|
||||||
if (save->vert_count == 0 || save->prim_count == 0)
|
if (save->vert_count == 0 || save->prim_count == 0)
|
||||||
|
|
@ -622,11 +625,6 @@ compile_vertex_list(struct gl_context *ctx)
|
||||||
int max_indices_count = MAX2(total_vert_count * 2 - (node->cold->prim_count * 2) + 1,
|
int max_indices_count = MAX2(total_vert_count * 2 - (node->cold->prim_count * 2) + 1,
|
||||||
total_vert_count);
|
total_vert_count);
|
||||||
|
|
||||||
int indices_offset = 0;
|
|
||||||
int available = save->previous_ib ? (save->previous_ib->Size / 4 - save->ib_first_free_index) : 0;
|
|
||||||
if (available >= max_indices_count) {
|
|
||||||
indices_offset = save->ib_first_free_index;
|
|
||||||
}
|
|
||||||
int size = max_indices_count * sizeof(uint32_t);
|
int size = max_indices_count * sizeof(uint32_t);
|
||||||
uint32_t* indices = (uint32_t*) malloc(size);
|
uint32_t* indices = (uint32_t*) malloc(size);
|
||||||
struct _mesa_prim *merged_prims = NULL;
|
struct _mesa_prim *merged_prims = NULL;
|
||||||
|
|
@ -729,7 +727,7 @@ compile_vertex_list(struct gl_context *ctx)
|
||||||
assert(last_valid_prim <= i);
|
assert(last_valid_prim <= i);
|
||||||
merged_prims = realloc(merged_prims, (1 + last_valid_prim) * sizeof(struct _mesa_prim));
|
merged_prims = realloc(merged_prims, (1 + last_valid_prim) * sizeof(struct _mesa_prim));
|
||||||
merged_prims[last_valid_prim] = original_prims[i];
|
merged_prims[last_valid_prim] = original_prims[i];
|
||||||
merged_prims[last_valid_prim].start = indices_offset + start;
|
merged_prims[last_valid_prim].start = start;
|
||||||
merged_prims[last_valid_prim].count = idx - start;
|
merged_prims[last_valid_prim].count = idx - start;
|
||||||
}
|
}
|
||||||
merged_prims[last_valid_prim].mode = mode;
|
merged_prims[last_valid_prim].mode = mode;
|
||||||
|
|
@ -742,115 +740,116 @@ compile_vertex_list(struct gl_context *ctx)
|
||||||
node->cold->ib.count = idx;
|
node->cold->ib.count = idx;
|
||||||
node->cold->ib.index_size_shift = (GL_UNSIGNED_INT - GL_UNSIGNED_BYTE) >> 1;
|
node->cold->ib.index_size_shift = (GL_UNSIGNED_INT - GL_UNSIGNED_BYTE) >> 1;
|
||||||
|
|
||||||
GLintptr old_offset = 0;
|
/* How many bytes do we need to store the indices and the vertices */
|
||||||
if (save->VAO[0]) {
|
total_vert_count = vertex_to_index ? (max_index + 1) : idx;
|
||||||
old_offset = save->VAO[0]->BufferBinding[0].Offset
|
unsigned total_bytes_needed = idx * sizeof(uint32_t) +
|
||||||
+ save->VAO[0]->VertexAttrib[VERT_ATTRIB_POS].RelativeOffset;
|
total_vert_count * save->vertex_size * sizeof(fi_type);
|
||||||
}
|
|
||||||
GLintptr buffer_offset =
|
|
||||||
(save->buffer_map - save->vertex_store->buffer_in_ram) * sizeof(GLfloat);
|
|
||||||
const GLintptr original_buffer_offset = buffer_offset;
|
|
||||||
assert(old_offset <= buffer_offset);
|
|
||||||
const GLintptr offset_diff = buffer_offset - old_offset;
|
|
||||||
GLuint start_offset = 0;
|
|
||||||
if (offset_diff > 0 && stride > 0 && offset_diff % stride == 0) {
|
|
||||||
/* The vertex size is an exact multiple of the buffer offset.
|
|
||||||
* This means that we can use zero-based vertex attribute pointers
|
|
||||||
* and specify the start of the primitive with the _mesa_prim::start
|
|
||||||
* field. This results in issuing several draw calls with identical
|
|
||||||
* vertex attribute information. This can result in fewer state
|
|
||||||
* changes in drivers. In particular, the Gallium CSO module will
|
|
||||||
* filter out redundant vertex buffer changes.
|
|
||||||
*/
|
|
||||||
/* We cannot immediately update the primitives as some methods below
|
|
||||||
* still need the uncorrected start vertices
|
|
||||||
*/
|
|
||||||
start_offset = offset_diff/stride;
|
|
||||||
assert(old_offset == buffer_offset - offset_diff);
|
|
||||||
buffer_offset = old_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Correct the primitive starts, we can only do this here as copy_vertices
|
const GLintptr old_offset = save->VAO[0] ?
|
||||||
* and convert_line_loop_to_strip above consume the uncorrected starts.
|
save->VAO[0]->BufferBinding[0].Offset + save->VAO[0]->VertexAttrib[VERT_ATTRIB_POS].RelativeOffset : 0;
|
||||||
* On the other hand the _vbo_loopback_vertex_list call below needs the
|
if (old_offset != save->current_bo_bytes_used && stride > 0) {
|
||||||
* primitives to be corrected already.
|
GLintptr offset_diff = save->current_bo_bytes_used - old_offset;
|
||||||
*/
|
while (offset_diff > 0 &&
|
||||||
for (unsigned i = 0; i < node->cold->prim_count; i++) {
|
save->current_bo_bytes_used < save->current_bo->Size &&
|
||||||
node->cold->prims[i].start += start_offset;
|
offset_diff % stride != 0) {
|
||||||
}
|
save->current_bo_bytes_used++;
|
||||||
/* start_offset shifts vertices (so v[0] becomes v[start_offset]), so we have
|
offset_diff = save->current_bo_bytes_used - old_offset;
|
||||||
* to apply this transformation to all indices and max_index.
|
|
||||||
*/
|
|
||||||
for (unsigned i = 0; i < idx; i++)
|
|
||||||
indices[i] += start_offset;
|
|
||||||
max_index += start_offset;
|
|
||||||
|
|
||||||
if (!indices_offset) {
|
|
||||||
/* Allocate a new index buffer */
|
|
||||||
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
|
|
||||||
save->previous_ib = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
|
|
||||||
bool success = ctx->Driver.BufferData(ctx,
|
|
||||||
GL_ELEMENT_ARRAY_BUFFER_ARB,
|
|
||||||
MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t),
|
|
||||||
NULL,
|
|
||||||
GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
|
|
||||||
save->previous_ib);
|
|
||||||
if (!success) {
|
|
||||||
_mesa_reference_buffer_object(ctx, &save->previous_ib, NULL);
|
|
||||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
buffer_offset = save->current_bo_bytes_used;
|
||||||
|
|
||||||
_mesa_reference_buffer_object(ctx, &node->cold->ib.obj, save->previous_ib);
|
/* Can we reuse the previous bo or should we allocate a new one? */
|
||||||
|
int available_bytes = save->current_bo ? save->current_bo->Size - save->current_bo_bytes_used : 0;
|
||||||
|
if (total_bytes_needed > available_bytes) {
|
||||||
|
if (save->current_bo)
|
||||||
|
_mesa_reference_buffer_object(ctx, &save->current_bo, NULL);
|
||||||
|
save->current_bo = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
|
||||||
|
bool success = ctx->Driver.BufferData(ctx,
|
||||||
|
GL_ELEMENT_ARRAY_BUFFER_ARB,
|
||||||
|
MAX2(total_bytes_needed, VBO_SAVE_BUFFER_SIZE * sizeof(uint32_t)),
|
||||||
|
NULL,
|
||||||
|
GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
|
||||||
|
save->current_bo);
|
||||||
|
if (!success) {
|
||||||
|
_mesa_reference_buffer_object(ctx, &save->current_bo, NULL);
|
||||||
|
_mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation");
|
||||||
|
} else {
|
||||||
|
save->current_bo_bytes_used = 0;
|
||||||
|
available_bytes = save->current_bo->Size;
|
||||||
|
}
|
||||||
|
buffer_offset = 0;
|
||||||
|
} else {
|
||||||
|
assert(old_offset <= buffer_offset);
|
||||||
|
const GLintptr offset_diff = buffer_offset - old_offset;
|
||||||
|
if (offset_diff > 0 && stride > 0 && offset_diff % stride == 0) {
|
||||||
|
/* The vertex size is an exact multiple of the buffer offset.
|
||||||
|
* This means that we can use zero-based vertex attribute pointers
|
||||||
|
* and specify the start of the primitive with the _mesa_prim::start
|
||||||
|
* field. This results in issuing several draw calls with identical
|
||||||
|
* vertex attribute information. This can result in fewer state
|
||||||
|
* changes in drivers. In particular, the Gallium CSO module will
|
||||||
|
* filter out redundant vertex buffer changes.
|
||||||
|
*/
|
||||||
|
/* We cannot immediately update the primitives as some methods below
|
||||||
|
* still need the uncorrected start vertices
|
||||||
|
*/
|
||||||
|
start_offset = offset_diff/stride;
|
||||||
|
assert(old_offset == buffer_offset - offset_diff);
|
||||||
|
buffer_offset = old_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Correct the primitive starts, we can only do this here as copy_vertices
|
||||||
|
* and convert_line_loop_to_strip above consume the uncorrected starts.
|
||||||
|
* On the other hand the _vbo_loopback_vertex_list call below needs the
|
||||||
|
* primitives to be corrected already.
|
||||||
|
*/
|
||||||
|
for (unsigned i = 0; i < node->cold->prim_count; i++) {
|
||||||
|
node->cold->prims[i].start += start_offset;
|
||||||
|
}
|
||||||
|
/* start_offset shifts vertices (so v[0] becomes v[start_offset]), so we have
|
||||||
|
* to apply this transformation to all indices and max_index.
|
||||||
|
*/
|
||||||
|
for (unsigned i = 0; i < idx; i++)
|
||||||
|
indices[i] += start_offset;
|
||||||
|
max_index += start_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
_mesa_reference_buffer_object(ctx, &node->cold->ib.obj, save->current_bo);
|
||||||
|
|
||||||
|
/* Upload the vertices first (see buffer_offset) */
|
||||||
|
ctx->Driver.BufferSubData(ctx,
|
||||||
|
save->current_bo_bytes_used,
|
||||||
|
total_vert_count * save->vertex_size * sizeof(fi_type),
|
||||||
|
vertex_to_index ? temp_vertices_buffer : save->buffer_map,
|
||||||
|
node->cold->ib.obj);
|
||||||
|
save->current_bo_bytes_used += total_vert_count * save->vertex_size * sizeof(fi_type);
|
||||||
|
|
||||||
|
if (vertex_to_index) {
|
||||||
|
_mesa_hash_table_destroy(vertex_to_index, _free_entry);
|
||||||
|
free(temp_vertices_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since we're append the indices to an existing buffer, we need to adjust the start value of each
|
||||||
|
* primitive (not the indices themselves). */
|
||||||
|
save->current_bo_bytes_used += align(save->current_bo_bytes_used, 4) - save->current_bo_bytes_used;
|
||||||
|
int indices_offset = save->current_bo_bytes_used / 4;
|
||||||
|
for (int i = 0; i < merged_prim_count; i++) {
|
||||||
|
merged_prims[i].start += indices_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then upload the indices. */
|
||||||
if (node->cold->ib.obj) {
|
if (node->cold->ib.obj) {
|
||||||
ctx->Driver.BufferSubData(ctx,
|
ctx->Driver.BufferSubData(ctx,
|
||||||
indices_offset * sizeof(uint32_t),
|
save->current_bo_bytes_used,
|
||||||
idx * sizeof(uint32_t),
|
idx * sizeof(uint32_t),
|
||||||
indices,
|
indices,
|
||||||
node->cold->ib.obj);
|
node->cold->ib.obj);
|
||||||
save->ib_first_free_index = indices_offset + idx;
|
save->current_bo_bytes_used += idx * sizeof(uint32_t);
|
||||||
} else {
|
} else {
|
||||||
node->cold->vertex_count = 0;
|
node->cold->vertex_count = 0;
|
||||||
node->cold->prim_count = 0;
|
node->cold->prim_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (vertex_to_index) {
|
|
||||||
ctx->Driver.BufferSubData(ctx,
|
|
||||||
original_buffer_offset,
|
|
||||||
(max_index - start_offset + 1) * save->vertex_size * sizeof(fi_type),
|
|
||||||
temp_vertices_buffer,
|
|
||||||
save->vertex_store->bufferobj);
|
|
||||||
|
|
||||||
_mesa_hash_table_destroy(vertex_to_index, _free_entry);
|
|
||||||
free(temp_vertices_buffer);
|
|
||||||
} else {
|
|
||||||
ctx->Driver.BufferSubData(ctx,
|
|
||||||
original_buffer_offset,
|
|
||||||
idx * save->vertex_size * sizeof(fi_type),
|
|
||||||
&save->vertex_store->buffer_in_ram[original_buffer_offset / sizeof(float)],
|
|
||||||
save->vertex_store->bufferobj);
|
|
||||||
}
|
|
||||||
|
|
||||||
GLuint offsets[VBO_ATTRIB_MAX];
|
|
||||||
for (unsigned i = 0, offset = 0; i < VBO_ATTRIB_MAX; ++i) {
|
|
||||||
offsets[i] = offset;
|
|
||||||
offset += save->attrsz[i] * sizeof(GLfloat);
|
|
||||||
}
|
|
||||||
/* Create a pair of VAOs for the possible VERTEX_PROCESSING_MODEs
|
|
||||||
* Note that this may reuse the previous one of possible.
|
|
||||||
*/
|
|
||||||
for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm) {
|
|
||||||
/* create or reuse the vao */
|
|
||||||
update_vao(ctx, vpm, &save->VAO[vpm],
|
|
||||||
save->vertex_store->bufferobj, buffer_offset, stride,
|
|
||||||
save->enabled, save->attrsz, save->attrtype, offsets);
|
|
||||||
/* Reference the vao in the dlist */
|
|
||||||
node->VAO[vpm] = NULL;
|
|
||||||
_mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Prepare for DrawGallium */
|
/* Prepare for DrawGallium */
|
||||||
memset(&node->merged.info, 0, sizeof(struct pipe_draw_info));
|
memset(&node->merged.info, 0, sizeof(struct pipe_draw_info));
|
||||||
/* The other info fields will be updated in vbo_save_playback_vertex_list */
|
/* The other info fields will be updated in vbo_save_playback_vertex_list */
|
||||||
|
|
@ -891,6 +890,36 @@ compile_vertex_list(struct gl_context *ctx)
|
||||||
free(merged_prims);
|
free(merged_prims);
|
||||||
|
|
||||||
end:
|
end:
|
||||||
|
|
||||||
|
if (!save->current_bo) {
|
||||||
|
save->current_bo = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1);
|
||||||
|
bool success = ctx->Driver.BufferData(ctx,
|
||||||
|
GL_ELEMENT_ARRAY_BUFFER_ARB,
|
||||||
|
VBO_SAVE_BUFFER_SIZE * sizeof(uint32_t),
|
||||||
|
NULL,
|
||||||
|
GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT,
|
||||||
|
save->current_bo);
|
||||||
|
}
|
||||||
|
|
||||||
|
GLuint offsets[VBO_ATTRIB_MAX];
|
||||||
|
for (unsigned i = 0, offset = 0; i < VBO_ATTRIB_MAX; ++i) {
|
||||||
|
offsets[i] = offset;
|
||||||
|
offset += save->attrsz[i] * sizeof(GLfloat);
|
||||||
|
}
|
||||||
|
/* Create a pair of VAOs for the possible VERTEX_PROCESSING_MODEs
|
||||||
|
* Note that this may reuse the previous one of possible.
|
||||||
|
*/
|
||||||
|
for (gl_vertex_processing_mode vpm = VP_MODE_FF; vpm < VP_MODE_MAX; ++vpm) {
|
||||||
|
/* create or reuse the vao */
|
||||||
|
update_vao(ctx, vpm, &save->VAO[vpm],
|
||||||
|
save->current_bo, buffer_offset, stride,
|
||||||
|
save->enabled, save->attrsz, save->attrtype, offsets);
|
||||||
|
/* Reference the vao in the dlist */
|
||||||
|
node->VAO[vpm] = NULL;
|
||||||
|
_mesa_reference_vao(ctx, &node->VAO[vpm], save->VAO[vpm]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Deal with GL_COMPILE_AND_EXECUTE:
|
/* Deal with GL_COMPILE_AND_EXECUTE:
|
||||||
*/
|
*/
|
||||||
if (ctx->ExecuteFlag) {
|
if (ctx->ExecuteFlag) {
|
||||||
|
|
@ -898,8 +927,24 @@ end:
|
||||||
|
|
||||||
_glapi_set_dispatch(ctx->Exec);
|
_glapi_set_dispatch(ctx->Exec);
|
||||||
|
|
||||||
/* Note that the range of referenced vertices must be mapped already */
|
/* _vbo_loopback_vertex_list doesn't use the index buffer, so we have to
|
||||||
|
* use buffer_in_ram instead of current_bo which contains all vertices instead
|
||||||
|
* of the deduplicated vertices only in the !UseLoopback case.
|
||||||
|
*
|
||||||
|
* The problem is that the VAO offset is based on current_bo's layout,
|
||||||
|
* so we have to use a temp value.
|
||||||
|
*/
|
||||||
|
struct gl_vertex_array_object *vao = node->VAO[VP_MODE_SHADER];
|
||||||
|
GLintptr original = vao->BufferBinding[0].Offset;
|
||||||
|
if (!ctx->ListState.Current.UseLoopback) {
|
||||||
|
GLintptr new_offset = (save->buffer_map - save->vertex_store->buffer_in_ram) *
|
||||||
|
sizeof(GLfloat);
|
||||||
|
/* 'start_offset' has been added to all primitives 'start', so undo it here. */
|
||||||
|
new_offset -= start_offset * stride;
|
||||||
|
vao->BufferBinding[0].Offset = new_offset;
|
||||||
|
}
|
||||||
_vbo_loopback_vertex_list(ctx, node, save->vertex_store->buffer_in_ram);
|
_vbo_loopback_vertex_list(ctx, node, save->vertex_store->buffer_in_ram);
|
||||||
|
vao->BufferBinding[0].Offset = original;
|
||||||
|
|
||||||
_glapi_set_dispatch(dispatch);
|
_glapi_set_dispatch(dispatch);
|
||||||
}
|
}
|
||||||
|
|
@ -1587,8 +1632,10 @@ _ensure_draws_fits_in_storage(struct gl_context *ctx, int primcount, int vertcou
|
||||||
bool realloc_vert = save->vertex_size && (save->vert_count + vertcount >= save->max_vert);
|
bool realloc_vert = save->vertex_size && (save->vert_count + vertcount >= save->max_vert);
|
||||||
|
|
||||||
if (realloc_prim || realloc_vert) {
|
if (realloc_prim || realloc_vert) {
|
||||||
if (save->vert_count || save->prim_count)
|
if (save->vert_count || save->prim_count) {
|
||||||
|
/* TODO: this really isn't needed. We should realloc only the CPU-side memory. */
|
||||||
compile_vertex_list(ctx);
|
compile_vertex_list(ctx);
|
||||||
|
}
|
||||||
realloc_storage(ctx, realloc_prim ? primcount : -1, realloc_vert ? vertcount : -1);
|
realloc_storage(ctx, realloc_prim ? primcount : -1, realloc_vert ? vertcount : -1);
|
||||||
reset_counters(ctx);
|
reset_counters(ctx);
|
||||||
assert(save->prim_max);
|
assert(save->prim_max);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue