zink: decompose vertex attribs into single components when not supported

this avoids vbuf in a lot more cases on radv where 3component attribs aren't supported

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12771>
This commit is contained in:
Mike Blumenkrantz 2021-08-24 16:01:56 -04:00 committed by Marge Bot
parent c106c45a99
commit 1542f3eb47
7 changed files with 196 additions and 21 deletions

View file

@ -564,6 +564,79 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_
zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
}
struct decompose_state {
nir_variable **split;
bool needs_w;
};
static bool
lower_attrib(nir_builder *b, nir_instr *instr, void *data)
{
struct decompose_state *state = data;
nir_variable **split = state->split;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_deref)
return false;
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
if (var != split[0])
return false;
unsigned num_components = glsl_get_vector_elements(split[0]->type);
b->cursor = nir_after_instr(instr);
nir_ssa_def *loads[4];
for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
if (state->needs_w) {
/* oob load w comopnent to get correct value for int/float */
loads[3] = nir_channel(b, loads[0], 3);
loads[0] = nir_channel(b, loads[0], 0);
}
nir_ssa_def *new_load = nir_vec(b, loads, num_components);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
nir_instr_remove_v(instr);
return true;
}
static bool
decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
{
uint32_t bits = 0;
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
bits |= BITFIELD_BIT(var->data.driver_location);
bits = ~bits;
u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
nir_variable *split[5];
struct decompose_state state;
state.split = split;
nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
assert(var);
split[0] = var;
bits |= BITFIELD_BIT(var->data.driver_location);
const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
unsigned num_components = glsl_get_vector_elements(var->type);
state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
split[i+1] = nir_variable_clone(var, nir);
split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
if (decomposed_attrs_without_w & BITFIELD_BIT(location))
split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
else
split[i+1]->type = new_type;
split[i+1]->data.driver_location = ffs(bits) - 1;
bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
nir_shader_add_variable(nir, split[i+1]);
}
var->data.mode = nir_var_shader_temp;
nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
}
nir_fixup_deref_modes(nir);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
optimize_nir(nir);
return true;
}
static void
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
{
@ -731,17 +804,25 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
/* TODO: use a separate mem ctx here for ralloc */
switch (zs->nir->info.stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_VERTEX: {
uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
const struct zink_vs_key *vs_key = zink_vs_key(key);
decomposed_attrs = vs_key->decomposed_attrs;
decomposed_attrs_without_w = vs_key->decomposed_attrs_without_w;
if (decomposed_attrs || decomposed_attrs_without_w)
NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
FALLTHROUGH;
}
case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if (zink_vs_key(key)->last_vertex_stage) {
if (zink_vs_key_base(key)->last_vertex_stage) {
if (zs->streamout.have_xfb)
streamout = &zs->streamout;
if (!zink_vs_key(key)->clip_halfz) {
if (!zink_vs_key_base(key)->clip_halfz) {
NIR_PASS_V(nir, nir_lower_clip_halfz);
}
if (zink_vs_key(key)->push_drawid) {
if (zink_vs_key_base(key)->push_drawid) {
NIR_PASS_V(nir, lower_drawid);
}
}

View file

@ -78,6 +78,8 @@ struct zink_gfx_pipeline_state {
uint8_t coord_replace_bits;
bool coord_replace_yinvert;
bool drawid_broken;
uint32_t decomposed_attrs;
uint32_t decomposed_attrs_without_w;
struct zink_blend_state *blend_state;
struct zink_render_pass *render_pass;
VkPipeline pipeline;

View file

@ -105,11 +105,11 @@ keybox_equals(const void *void_a, const void *void_b)
}
static void
shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs,
struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
shader_key_vs_base_gen(struct zink_context *ctx, struct zink_shader *zs,
struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
{
struct zink_vs_key *vs_key = &key->key.vs;
key->size = sizeof(struct zink_vs_key);
struct zink_vs_key_base *vs_key = &key->key.vs_base;
key->size = sizeof(struct zink_vs_key_base);
vs_key->clip_halfz = ctx->rast_state && ctx->rast_state->base.clip_halfz;
switch (zs->nir->info.stage) {
@ -128,6 +128,17 @@ shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs,
}
}
static void
shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs,
struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
{
struct zink_vs_key *vs_key = &key->key.vs;
shader_key_vs_base_gen(ctx, zs, shaders, key);
vs_key->decomposed_attrs = ctx->element_state->decomposed_attrs;
vs_key->decomposed_attrs_without_w = ctx->element_state->decomposed_attrs_without_w;
key->size += 2 * 4;
}
static void
shader_key_fs_gen(struct zink_context *ctx, struct zink_shader *zs,
struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
@ -167,8 +178,8 @@ static zink_shader_key_gen shader_key_vtbl[] =
[MESA_SHADER_VERTEX] = shader_key_vs_gen,
[MESA_SHADER_TESS_CTRL] = shader_key_tcs_gen,
/* reusing vs key for now since we're only using clip_halfz */
[MESA_SHADER_TESS_EVAL] = shader_key_vs_gen,
[MESA_SHADER_GEOMETRY] = shader_key_vs_gen,
[MESA_SHADER_TESS_EVAL] = shader_key_vs_base_gen,
[MESA_SHADER_GEOMETRY] = shader_key_vs_base_gen,
[MESA_SHADER_FRAGMENT] = shader_key_fs_gen,
};
@ -179,7 +190,7 @@ get_default_shader_module_ptr(struct zink_gfx_program *prog, struct zink_shader
if (zs->nir->info.stage == MESA_SHADER_VERTEX ||
zs->nir->info.stage == MESA_SHADER_TESS_EVAL) {
/* no streamout or halfz */
if (!zink_vs_key(key)->last_vertex_stage)
if (!zink_vs_key_base(key)->last_vertex_stage)
return &prog->default_variants[zs->nir->info.stage][1];
}
return &prog->default_variants[zs->nir->info.stage][0];

View file

@ -994,9 +994,15 @@ zink_is_format_supported(struct pipe_screen *pscreen,
VkFormatProperties props = screen->format_props[format];
if (target == PIPE_BUFFER) {
if (bind & PIPE_BIND_VERTEX_BUFFER &&
!(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
return false;
if (bind & PIPE_BIND_VERTEX_BUFFER) {
if (!(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) {
enum pipe_format new_format = zink_decompose_vertex_format(format);
if (!new_format)
return false;
if (!(screen->format_props[new_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
return false;
}
}
if (bind & PIPE_BIND_SAMPLER_VIEW &&
!(props.bufferFeatures & VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT))

View file

@ -26,12 +26,19 @@
#ifndef ZINK_SHADER_KEYS_H
# define ZINK_SHADER_KEYS_H
struct zink_vs_key {
struct zink_vs_key_base {
bool clip_halfz;
bool push_drawid;
bool last_vertex_stage;
};
struct zink_vs_key {
struct zink_vs_key_base base;
uint8_t pad;
uint32_t decomposed_attrs;
uint32_t decomposed_attrs_without_w;
};
struct zink_fs_key {
uint8_t coord_replace_bits;
bool coord_replace_yinvert;
@ -52,6 +59,7 @@ struct zink_shader_key {
union {
/* reuse vs key for now with tes/gs since we only use clip_halfz */
struct zink_vs_key vs;
struct zink_vs_key_base vs_base;
struct zink_fs_key fs;
} key;
struct zink_shader_key_base base;
@ -67,6 +75,12 @@ zink_fs_key(const struct zink_shader_key *key)
return &key->key.fs;
}
static inline const struct zink_vs_key_base *
zink_vs_key_base(const struct zink_shader_key *key)
{
return &key->key.vs_base;
}
static inline const struct zink_vs_key *
zink_vs_key(const struct zink_shader_key *key)
{

View file

@ -24,6 +24,7 @@
#include "zink_state.h"
#include "zink_context.h"
#include "zink_format.h"
#include "zink_screen.h"
#include "compiler/shader_enums.h"
@ -49,6 +50,10 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
buffer_map[i] = -1;
int num_bindings = 0;
unsigned num_decomposed = 0;
uint32_t size8 = 0;
uint32_t size16 = 0;
uint32_t size32 = 0;
for (i = 0; i < num_elements; ++i) {
const struct pipe_vertex_element *elem = elements + i;
@ -59,7 +64,6 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
}
binding = buffer_map[binding];
ves->bindings[binding].binding = binding;
ves->bindings[binding].inputRate = elem->instance_divisor ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
@ -68,24 +72,73 @@ zink_create_vertex_elements_state(struct pipe_context *pctx,
debug_printf("zink: clamping instance divisor %u to %u\n", elem->instance_divisor, screen->info.vdiv_props.maxVertexAttribDivisor);
ves->divisor[binding] = MIN2(elem->instance_divisor, screen->info.vdiv_props.maxVertexAttribDivisor);
VkFormat format;
if (screen->format_props[elem->src_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)
format = zink_get_format(screen, elem->src_format);
else {
enum pipe_format new_format = zink_decompose_vertex_format(elem->src_format);
assert(new_format);
num_decomposed++;
assert(screen->format_props[new_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT);
if (util_format_get_blocksize(new_format) == 4)
size32 |= BITFIELD_BIT(i);
else if (util_format_get_blocksize(new_format) == 2)
size16 |= BITFIELD_BIT(i);
else
size8 |= BITFIELD_BIT(i);
format = zink_get_format(screen, new_format);
unsigned size;
if (i < 8)
size = 1;
else if (i < 16)
size = 2;
else
size = 4;
if (util_format_get_nr_components(elem->src_format) == 4) {
ves->decomposed_attrs |= BITFIELD_BIT(i);
ves->decomposed_attrs_size = size;
} else {
ves->decomposed_attrs_without_w |= BITFIELD_BIT(i);
}
}
if (screen->info.have_EXT_vertex_input_dynamic_state) {
ves->hw_state.dynattribs[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT;
ves->hw_state.dynattribs[i].binding = binding;
ves->hw_state.dynattribs[i].location = i;
ves->hw_state.dynattribs[i].format = zink_get_format(screen,
elem->src_format);
ves->hw_state.dynattribs[i].format = format;
assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED);
ves->hw_state.dynattribs[i].offset = elem->src_offset;
} else {
ves->hw_state.attribs[i].binding = binding;
ves->hw_state.attribs[i].location = i;
ves->hw_state.attribs[i].format = zink_get_format(screen,
elem->src_format);
ves->hw_state.attribs[i].format = format;
assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED);
ves->hw_state.attribs[i].offset = elem->src_offset;
}
}
assert(num_decomposed + num_elements <= PIPE_MAX_ATTRIBS);
u_foreach_bit(i, ves->decomposed_attrs | ves->decomposed_attrs_without_w) {
const struct pipe_vertex_element *elem = elements + i;
const struct util_format_description *desc = util_format_description(elem->src_format);
unsigned size = 1;
if (size32 & BITFIELD_BIT(i))
size = 4;
else if (size16 & BITFIELD_BIT(i))
size = 2;
for (unsigned j = 1; j < desc->nr_channels; j++) {
if (screen->info.have_EXT_vertex_input_dynamic_state) {
memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[i], sizeof(VkVertexInputAttributeDescription2EXT));
ves->hw_state.dynattribs[num_elements].location = num_elements;
ves->hw_state.dynattribs[num_elements].offset += j * size;
} else {
memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[i], sizeof(VkVertexInputAttributeDescription));
ves->hw_state.attribs[num_elements].location = num_elements;
ves->hw_state.attribs[num_elements].offset += j * size;
}
num_elements++;
}
}
ves->hw_state.num_bindings = num_bindings;
ves->hw_state.num_attribs = num_elements;
if (screen->info.have_EXT_vertex_input_dynamic_state) {
@ -124,6 +177,11 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx,
ctx->vertex_state_changed = !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state;
ctx->vertex_buffers_dirty = ctx->element_state->hw_state.num_bindings > 0;
}
if (ctx->element_state->decomposed_attrs != state->decomposed_attrs ||
ctx->element_state->decomposed_attrs_without_w != state->decomposed_attrs_without_w)
ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX);
state->decomposed_attrs = ctx->element_state->decomposed_attrs;
state->decomposed_attrs_without_w = ctx->element_state->decomposed_attrs_without_w;
state->element_state = &ctx->element_state->hw_state;
} else {
state->element_state = NULL;

View file

@ -52,6 +52,9 @@ struct zink_vertex_elements_state {
} bindings[PIPE_MAX_ATTRIBS];
uint32_t divisor[PIPE_MAX_ATTRIBS];
uint8_t binding_map[PIPE_MAX_ATTRIBS];
uint32_t decomposed_attrs;
unsigned decomposed_attrs_size;
uint32_t decomposed_attrs_without_w;
struct zink_vertex_elements_hw_state hw_state;
};