diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4e1e29be812..2547833aea5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5386,11 +5386,10 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr) } bool -check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset, +check_vertex_fetch_size(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset, unsigned binding_align, unsigned channels) { - unsigned vertex_byte_size = vtx_info->chan_byte_size * channels; - if (vtx_info->chan_byte_size != 4 && channels == 3) + if (!(vtx_info->has_hw_format & BITFIELD_BIT(channels - 1))) return false; /* Split typed vertex buffer loads on GFX6 and GFX10+ to avoid any @@ -5399,17 +5398,18 @@ check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info, * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO * offset is 2 for R16G16B16A16_SNORM). */ + unsigned vertex_byte_size = vtx_info->chan_byte_size * channels; return (ctx->options->gfx_level >= GFX7 && ctx->options->gfx_level <= GFX9) || (offset % vertex_byte_size == 0 && MAX2(binding_align, 1) % vertex_byte_size == 0); } uint8_t -get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset, - unsigned* channels, unsigned max_channels, unsigned binding_align) +get_fetch_format(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset, + unsigned* channels, unsigned max_channels, unsigned binding_align) { if (!vtx_info->chan_byte_size) { *channels = vtx_info->num_channels; - return vtx_info->chan_format; + return vtx_info->hw_format[0]; } unsigned num_channels = *channels; @@ -5434,22 +5434,7 @@ get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, un num_channels = new_channels; } - switch (vtx_info->chan_format) { - case V_008F0C_BUF_DATA_FORMAT_8: - return std::array{V_008F0C_BUF_DATA_FORMAT_8, V_008F0C_BUF_DATA_FORMAT_8_8, - V_008F0C_BUF_DATA_FORMAT_INVALID, - V_008F0C_BUF_DATA_FORMAT_8_8_8_8}[num_channels - 1]; - case V_008F0C_BUF_DATA_FORMAT_16: - return std::array{V_008F0C_BUF_DATA_FORMAT_16, V_008F0C_BUF_DATA_FORMAT_16_16, - V_008F0C_BUF_DATA_FORMAT_INVALID, - V_008F0C_BUF_DATA_FORMAT_16_16_16_16}[num_channels - 1]; - case V_008F0C_BUF_DATA_FORMAT_32: - return std::array{V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32}[num_channels - 1]; - } - unreachable("shouldn't reach here"); - return V_008F0C_BUF_DATA_FORMAT_INVALID; + return vtx_info->hw_format[num_channels - 1]; } void @@ -5503,12 +5488,12 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location]; uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location]; uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location]; - unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location]; + enum pipe_format attrib_format = + (enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location]; unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding]; - unsigned dfmt = attrib_format & 0xf; - unsigned nfmt = (attrib_format >> 4) & 0x7; - const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt); + const struct ac_vtx_format_info* vtx_info = + ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format); unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component; unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels); @@ -5559,15 +5544,11 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) /* use MUBUF when possible to avoid possible alignment issues */ /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */ - bool use_mubuf = - (nfmt == V_008F0C_BUF_NUM_FORMAT_FLOAT || nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || - nfmt == V_008F0C_BUF_NUM_FORMAT_SINT) && - vtx_info->chan_byte_size == 4 && bitsize != 16; - unsigned fetch_dfmt = V_008F0C_BUF_DATA_FORMAT_INVALID; + bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16; + unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID; if (!use_mubuf) { - fetch_dfmt = - get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component, - vtx_info->num_channels - channel_start, binding_align); + fetch_fmt = get_fetch_format(ctx, vtx_info, fetch_offset, &fetch_component, + vtx_info->num_channels - channel_start, binding_align); } else { /* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */ if (fetch_component == 3 && ctx->options->gfx_level == GFX6) @@ -5644,8 +5625,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) .instr; mubuf->mubuf().vtx_binding = attrib_binding + 1; } else { + unsigned dfmt = fetch_fmt & 0xf; + unsigned nfmt = fetch_fmt >> 4; Instruction* mtbuf = bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index, - soffset, fetch_dfmt, nfmt, fetch_offset, false, true) + soffset, dfmt, nfmt, fetch_offset, false, true) .instr; mtbuf->mtbuf().vtx_binding = attrib_binding + 1; } @@ -5665,7 +5648,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) if (!direct_fetch) { bool is_float = - nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT; + nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float; unsigned num_components = instr->dest.ssa.num_components; diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 702a5831713..68639c21ec6 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -416,30 +416,19 @@ init_context(isel_context* ctx, nir_shader* shader) ctx->ub_config.max_workgroup_size[1] = 2048; ctx->ub_config.max_workgroup_size[2] = 2048; for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) { - unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i]; - unsigned dfmt = attrib_format & 0xf; - unsigned nfmt = (attrib_format >> 4) & 0x7; + pipe_format format = (pipe_format)ctx->options->key.vs.vertex_attribute_formats[i]; + const struct util_format_description* desc = util_format_description(format); - uint32_t max = UINT32_MAX; - if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) { + uint32_t max; + if (desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED) { + max = UINT32_MAX; + } else if (desc->channel[0].normalized) { max = 0x3f800000u; - } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) { - bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED; - switch (dfmt) { - case V_008F0C_BUF_DATA_FORMAT_8: - case V_008F0C_BUF_DATA_FORMAT_8_8: - case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: max = uscaled ? 0x437f0000u : UINT8_MAX; break; - case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: - case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: max = uscaled ? 0x447fc000u : 1023; break; - case V_008F0C_BUF_DATA_FORMAT_10_11_11: - case V_008F0C_BUF_DATA_FORMAT_11_11_10: max = uscaled ? 0x44ffe000u : 2047; break; - case V_008F0C_BUF_DATA_FORMAT_16: - case V_008F0C_BUF_DATA_FORMAT_16_16: - case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: max = uscaled ? 0x477fff00u : UINT16_MAX; break; - case V_008F0C_BUF_DATA_FORMAT_32: - case V_008F0C_BUF_DATA_FORMAT_32_32: - case V_008F0C_BUF_DATA_FORMAT_32_32_32: - case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: max = uscaled ? 0x4f800000u : UINT32_MAX; break; + } else { + max = 0; + for (unsigned j = 0; j < desc->nr_channels; j++) { + uint32_t chan_max = u_uintN_max(desc->channel[0].size); + max = MAX2(max, desc->channel[j].pure_integer ? chan_max : fui(chan_max)); } } ctx->ub_config.vertex_attrib_max[i] = max; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 34ee6bfc48c..a79c25e5e13 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -394,11 +394,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp LLVMValueRef input; LLVMValueRef buffer_index; unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0; - unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index]; - unsigned data_format = attrib_format & 0x0f; - unsigned num_format = (attrib_format >> 4) & 0x07; - bool is_float = - num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT; + enum pipe_format attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index]; + const struct util_format_description *desc = util_format_description(attrib_format); + bool is_float = !desc->channel[0].pure_integer; uint8_t input_usage_mask = ctx->shader_info->vs.input_usage_mask[driver_location]; unsigned num_input_channels = util_last_bit(input_usage_mask); @@ -424,7 +422,8 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), ""); } - const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format); + const struct ac_vtx_format_info *vtx_info = + ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format); /* Adjust the number of channels to load based on the vertex attribute format. */ unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels); @@ -432,6 +431,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index]; unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index]; + unsigned data_format = vtx_info->hw_format[num_channels - 1] & 0xf; + unsigned num_format = vtx_info->hw_format[0] >> 4; + unsigned desc_index = ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding; desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask & @@ -444,8 +446,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp * dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and * VBO offset is 2 for R16G16B16A16_SNORM). */ - if ((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) { - unsigned chan_format = vtx_info->chan_format; + if (((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) || + !(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) { + unsigned chan_format = vtx_info->hw_format[0] & 0xf; LLVMValueRef values[4]; for (unsigned chan = 0; chan < num_channels; chan++) { diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index d0d153e6472..45e9e65fff3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3010,6 +3010,7 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin const struct radv_blend_state *blend) { struct radv_device *device = pipeline->base.device; + const struct radv_physical_device *pdevice = device->physical_device; struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags); key.has_multiview_view_index = !!state->rp->view_mask; @@ -3023,16 +3024,9 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin u_foreach_bit(i, state->vi->attributes_valid) { uint32_t binding = state->vi->attributes[i].binding; uint32_t offset = state->vi->attributes[i].offset; - VkFormat format = state->vi->attributes[i].format; - const struct util_format_description *format_desc; - unsigned num_format, data_format; - bool post_shuffle; + enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format); - format_desc = vk_format_description(format); - radv_translate_vertex_format(device->physical_device, format, format_desc, &data_format, - &num_format, &post_shuffle, &key.vs.vertex_alpha_adjust[i]); - - key.vs.vertex_attribute_formats[i] = data_format | (num_format << 4); + key.vs.vertex_attribute_formats[i] = format; key.vs.vertex_attribute_bindings[i] = binding; key.vs.vertex_attribute_offsets[i] = offset; key.vs.instance_rate_divisors[i] = state->vi->bindings[binding].divisor; @@ -3056,13 +3050,10 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin key.vs.instance_rate_inputs |= 1u << i; } - if (post_shuffle) { - key.vs.vertex_post_shuffle |= 1u << i; - } - - const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format); + const struct ac_vtx_format_info *vtx_info = + ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format); unsigned attrib_align = - dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size; + vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size; /* If offset is misaligned, then the buffer offset must be too. Just skip updating * vertex_binding_align in this case. @@ -3803,7 +3794,8 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alp } static bool -radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key) +radv_lower_vs_input(nir_shader *nir, const struct radv_physical_device *pdevice, + const struct radv_pipeline_key *pipeline_key) { nir_function_impl *impl = nir_shader_get_entrypoint(nir); bool progress = false; @@ -3824,25 +3816,22 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke continue; unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0; - enum ac_vs_input_alpha_adjust alpha_adjust = - pipeline_key->vs.vertex_alpha_adjust[location]; - bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location); unsigned component = nir_intrinsic_component(intrin); unsigned num_components = intrin->dest.ssa.num_components; - unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location]; - unsigned dfmt = attrib_format & 0xf; - unsigned nfmt = (attrib_format >> 4) & 0x7; - const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt); + enum pipe_format attrib_format = pipeline_key->vs.vertex_attribute_formats[location]; + const struct ac_vtx_format_info *desc = ac_get_vtx_format_info( + pdevice->rad_info.gfx_level, pdevice->rad_info.family, attrib_format); bool is_float = - nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT; + nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float; unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component; - unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels); + unsigned num_channels = MIN2(util_last_bit(mask), desc->num_channels); static const unsigned swizzle_normal[4] = {0, 1, 2, 3}; static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3}; + bool post_shuffle = G_008F0C_DST_SEL_X(desc->dst_sel) == V_008F0C_SQ_SEL_Z; const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal; b.cursor = nir_after_instr(instr); @@ -3871,9 +3860,9 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke } } - if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) { + if (desc->alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) { unsigned idx = num_components - 1; - channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]); + channels[idx] = radv_adjust_vertex_fetch_alpha(&b, desc->alpha_adjust, channels[idx]); } nir_ssa_def *new_dest = nir_vec(&b, channels, num_components); @@ -4579,7 +4568,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout } if (stages[MESA_SHADER_VERTEX].nir) { - NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key); + NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, device->physical_device, + pipeline_key); } if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 5ef418bc6d0..2b2d2181629 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -71,8 +71,6 @@ struct radv_pipeline_key { uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; uint8_t vertex_binding_align[MAX_VBS]; - enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS]; - uint32_t vertex_post_shuffle; uint32_t provoking_vtx_last : 1; uint32_t dynamic_input_state : 1; uint8_t topology;