mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 22:00:13 +01:00
radv: handle unaligned vertex fetches on GFX6/GFX10
The Vulkan spec doesn't have any words for vertex attributes alignment. Fixes a test failure on GFX6 and a GPU hang on GFX10 with: dEQP-VK.spirv_assembly.instruction.spirv1p4.entrypoint.tess_con_pc_entry_point vkpipeline-db results on GFX10: Totals from affected shaders: SGPRS: 463772 -> 472972 (1.98 %) VGPRS: 343208 -> 343752 (0.16 %) Spilled SGPRs: 323 -> 336 (4.02 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 13806200 -> 14164472 (2.60 %) bytes Max Waves: 84021 -> 83755 (-0.32 %) Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2161 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
parent
bd888bc1d6
commit
b37c91c12e
1 changed files with 87 additions and 48 deletions
|
|
@ -1280,34 +1280,28 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned
|
static const struct vertex_format_info {
|
||||||
get_num_channels_from_data_format(unsigned data_format)
|
uint8_t vertex_byte_size;
|
||||||
{
|
uint8_t num_channels;
|
||||||
switch (data_format) {
|
uint8_t chan_byte_size;
|
||||||
case V_008F0C_BUF_DATA_FORMAT_8:
|
uint8_t chan_format;
|
||||||
case V_008F0C_BUF_DATA_FORMAT_16:
|
} vertex_format_table[] = {
|
||||||
case V_008F0C_BUF_DATA_FORMAT_32:
|
{ 0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID }, /* BUF_DATA_FORMAT_INVALID */
|
||||||
return 1;
|
{ 1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_8_8:
|
{ 2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_16_16:
|
{ 2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8_8 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_32_32:
|
{ 4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32 */
|
||||||
return 2;
|
{ 4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16_16 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_10_11_11:
|
{ 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11 }, /* BUF_DATA_FORMAT_10_11_11 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_11_11_10:
|
{ 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10 }, /* BUF_DATA_FORMAT_11_11_10 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32:
|
{ 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 }, /* BUF_DATA_FORMAT_10_10_10_2 */
|
||||||
return 3;
|
{ 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 }, /* BUF_DATA_FORMAT_2_10_10_10 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
|
{ 4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8 }, /* BUF_DATA_FORMAT_8_8_8_8 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
|
{ 8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
|
{ 8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16 }, /* BUF_DATA_FORMAT_16_16_16_16 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
|
{ 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32_32 */
|
||||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
|
{ 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32 }, /* BUF_DATA_FORMAT_32_32_32_32 */
|
||||||
return 4;
|
};
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
|
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
|
||||||
|
|
@ -1393,11 +1387,13 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||||
ctx->args->ac.base_vertex), "");
|
ctx->args->ac.base_vertex), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(data_format < ARRAY_SIZE(vertex_format_table));
|
||||||
|
const struct vertex_format_info *vtx_info = &vertex_format_table[data_format];
|
||||||
|
|
||||||
/* Adjust the number of channels to load based on the vertex
|
/* Adjust the number of channels to load based on the vertex
|
||||||
* attribute format.
|
* attribute format.
|
||||||
*/
|
*/
|
||||||
unsigned num_format_channels = get_num_channels_from_data_format(data_format);
|
unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
|
||||||
unsigned num_channels = MIN2(num_input_channels, num_format_channels);
|
|
||||||
unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
|
unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
|
||||||
unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
|
unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
|
||||||
unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
|
unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
|
||||||
|
|
@ -1409,27 +1405,70 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||||
num_channels = MAX2(num_channels, 3);
|
num_channels = MAX2(num_channels, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attrib_stride != 0 && attrib_offset > attrib_stride) {
|
|
||||||
LLVMValueRef buffer_offset =
|
|
||||||
LLVMConstInt(ctx->ac.i32,
|
|
||||||
attrib_offset / attrib_stride, false);
|
|
||||||
|
|
||||||
buffer_index = LLVMBuildAdd(ctx->ac.builder,
|
|
||||||
buffer_index,
|
|
||||||
buffer_offset, "");
|
|
||||||
|
|
||||||
attrib_offset = attrib_offset % attrib_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
|
t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
|
||||||
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
|
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
|
||||||
|
|
||||||
input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
|
/* Perform per-channel vertex fetch operations if unaligned
|
||||||
buffer_index,
|
* access are detected. Only GFX6 and GFX10 are affected.
|
||||||
LLVMConstInt(ctx->ac.i32, attrib_offset, false),
|
*/
|
||||||
ctx->ac.i32_0, ctx->ac.i32_0,
|
bool unaligned_vertex_fetches = false;
|
||||||
num_channels,
|
if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) &&
|
||||||
data_format, num_format, 0, true);
|
vtx_info->chan_format != data_format &&
|
||||||
|
((attrib_offset % vtx_info->vertex_byte_size) ||
|
||||||
|
(attrib_stride % vtx_info->vertex_byte_size)))
|
||||||
|
unaligned_vertex_fetches = true;
|
||||||
|
|
||||||
|
if (unaligned_vertex_fetches) {
|
||||||
|
unsigned chan_format = vtx_info->chan_format;
|
||||||
|
LLVMValueRef values[4];
|
||||||
|
|
||||||
|
assert(ctx->ac.chip_class == GFX6 ||
|
||||||
|
ctx->ac.chip_class == GFX10);
|
||||||
|
|
||||||
|
for (unsigned chan = 0; chan < num_channels; chan++) {
|
||||||
|
unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
|
||||||
|
LLVMValueRef chan_index = buffer_index;
|
||||||
|
|
||||||
|
if (attrib_stride != 0 && chan_offset > attrib_stride) {
|
||||||
|
LLVMValueRef buffer_offset =
|
||||||
|
LLVMConstInt(ctx->ac.i32,
|
||||||
|
chan_offset / attrib_stride, false);
|
||||||
|
|
||||||
|
chan_index = LLVMBuildAdd(ctx->ac.builder,
|
||||||
|
buffer_index,
|
||||||
|
buffer_offset, "");
|
||||||
|
|
||||||
|
chan_offset = chan_offset % attrib_stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
values[chan] = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
|
||||||
|
chan_index,
|
||||||
|
LLVMConstInt(ctx->ac.i32, chan_offset, false),
|
||||||
|
ctx->ac.i32_0, ctx->ac.i32_0, 1,
|
||||||
|
chan_format, num_format, 0, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
input = ac_build_gather_values(&ctx->ac, values, num_channels);
|
||||||
|
} else {
|
||||||
|
if (attrib_stride != 0 && attrib_offset > attrib_stride) {
|
||||||
|
LLVMValueRef buffer_offset =
|
||||||
|
LLVMConstInt(ctx->ac.i32,
|
||||||
|
attrib_offset / attrib_stride, false);
|
||||||
|
|
||||||
|
buffer_index = LLVMBuildAdd(ctx->ac.builder,
|
||||||
|
buffer_index,
|
||||||
|
buffer_offset, "");
|
||||||
|
|
||||||
|
attrib_offset = attrib_offset % attrib_stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
|
||||||
|
buffer_index,
|
||||||
|
LLVMConstInt(ctx->ac.i32, attrib_offset, false),
|
||||||
|
ctx->ac.i32_0, ctx->ac.i32_0,
|
||||||
|
num_channels,
|
||||||
|
data_format, num_format, 0, true);
|
||||||
|
}
|
||||||
|
|
||||||
if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
|
if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
|
||||||
LLVMValueRef c[4];
|
LLVMValueRef c[4];
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue