gallium: lower raw 64-bit vertex formats in cso/vbuf instead of st/mesa

This lower each 64-bit vertex element into 1 or 2 32-bit vertex elements.
The motivation is to simplify st/mesa, which uses a lot more complicated
code to do the same thing, and do the lowering only on a cso_cache miss.

Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11370>
This commit is contained in:
Marek Olšák 2021-05-26 23:45:45 -04:00 committed by Marge Bot
parent 7b23b7c88e
commit 8b6fd3f786
4 changed files with 103 additions and 3 deletions

View file

@ -38,6 +38,7 @@
#include "pipe/p_state.h"
#include "util/u_draw.h"
#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@ -1026,9 +1027,15 @@ cso_set_vertex_elements_direct(struct cso_context *ctx,
return;
memcpy(&cso->state, velems, key_size);
cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe,
velems->count,
&cso->state.velems[0]);
/* Lower 64-bit vertex attributes. */
unsigned new_count = velems->count;
const struct pipe_vertex_element *new_elems = velems->velems;
struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
util_lower_uint64_vertex_elements(&new_elems, &new_count, tmp);
cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, new_count,
new_elems);
iter = cso_insert_state(&ctx->cache, hash_key, CSO_VELEMENTS, cso);
if (cso_hash_iter_is_null(iter)) {

View file

@ -166,6 +166,90 @@ util_upload_index_buffer(struct pipe_context *pipe,
return *out_buffer != NULL;
}
/**
* Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements.
* 3 and 4 component formats are expanded into 2 slots.
*
* @param velems Original vertex elements, will be updated to contain
* the lowered vertex elements.
* @param velem_count Original count, will be updated to contain the count
* after lowering.
* @param tmp Temporary array of PIPE_MAX_ATTRIBS vertex elements.
*/
void
util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
unsigned *velem_count,
struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])
{
const struct pipe_vertex_element *input = *velems;
unsigned count = *velem_count;
bool has_64bit = false;
for (unsigned i = 0; i < count; i++) {
has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT &&
input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT;
}
/* Return the original vertex elements if there is nothing to do. */
if (!has_64bit)
return;
/* Lower 64_UINT to 32_UINT. */
unsigned new_count = 0;
for (unsigned i = 0; i < count; i++) {
enum pipe_format format = input[i].src_format;
/* If the shader input is dvec2 or smaller, reduce the number of
* components to 2 at most. If the shader input is dvec3 or larger,
* expand the number of components to 3 at least. If the 3rd component
* is out of bounds, the hardware shouldn't skip loading the first
* 2 components.
*/
if (format >= PIPE_FORMAT_R64_UINT &&
format <= PIPE_FORMAT_R64G64B64A64_UINT) {
if (input[i].dual_slot)
format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT);
else
format = MIN2(format, PIPE_FORMAT_R64G64_UINT);
}
switch (format) {
case PIPE_FORMAT_R64_UINT:
tmp[new_count] = input[i];
tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT;
new_count++;
break;
case PIPE_FORMAT_R64G64_UINT:
tmp[new_count] = input[i];
tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
new_count++;
break;
case PIPE_FORMAT_R64G64B64_UINT:
case PIPE_FORMAT_R64G64B64A64_UINT:
assert(new_count + 2 <= PIPE_MAX_ATTRIBS);
tmp[new_count] = tmp[new_count + 1] = input[i];
tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
tmp[new_count + 1].src_format =
format == PIPE_FORMAT_R64G64B64_UINT ?
PIPE_FORMAT_R32G32_UINT :
PIPE_FORMAT_R32G32B32A32_UINT;
tmp[new_count + 1].src_offset += 16;
new_count += 2;
break;
default:
tmp[new_count++] = input[i];
break;
}
}
*velem_count = new_count;
*velems = tmp;
}
/* This is a helper for hardware bring-up. Don't remove. */
struct pipe_query *
util_begin_pipestat_query(struct pipe_context *ctx)

View file

@ -62,6 +62,11 @@ bool util_upload_index_buffer(struct pipe_context *pipe,
struct pipe_resource **out_buffer,
unsigned *out_offset, unsigned alignment);
void
util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
unsigned *velem_count,
struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS]);
/* Helper function to determine if the varying should contain the point
* coordinates, given the sprite_coord_enable mask. Requires
* PIPE_CAP_TGSI_TEXCOORD to be enabled.

View file

@ -89,6 +89,7 @@
#include "util/u_dump.h"
#include "util/format/u_format.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "indices/u_primconvert.h"
@ -841,6 +842,9 @@ static void *
u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
const struct pipe_vertex_element *attribs)
{
struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
util_lower_uint64_vertex_elements(&attribs, &count, tmp);
struct pipe_context *pipe = mgr->pipe;
unsigned i;
struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];