diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml
index 967f1382401..044e6da65b1 100644
--- a/src/freedreno/registers/a6xx.xml
+++ b/src/freedreno/registers/a6xx.xml
@@ -2675,7 +2675,8 @@ to upconvert to 32b float internally?
-
+
+
@@ -2723,8 +2724,9 @@ to upconvert to 32b float internally?
-
+
+
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index f38c07f34e3..e629c941ef2 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -3439,18 +3439,15 @@ tu6_bind_draw_states(struct tu_cmd_buffer *cmd,
(TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) {
for (uint32_t i = 0; i < pipeline->vi.count; i++) {
const uint32_t binding = pipeline->vi.bindings[i];
- const uint32_t stride = pipeline->vi.strides[i];
const struct tu_buffer *buf = cmd->state.vb.buffers[binding];
const VkDeviceSize offset = buf->bo_offset +
- cmd->state.vb.offsets[binding] +
- pipeline->vi.offsets[i];
+ cmd->state.vb.offsets[binding];
const VkDeviceSize size =
offset < buf->bo->size ? buf->bo->size - offset : 0;
tu_cs_emit_regs(cs,
A6XX_VFD_FETCH_BASE(i, .bo = buf->bo, .bo_offset = offset),
- A6XX_VFD_FETCH_SIZE(i, size),
- A6XX_VFD_FETCH_STRIDE(i, stride));
+ A6XX_VFD_FETCH_SIZE(i, size));
}
}
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 6312a84bff9..6412fcaaf42 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -779,7 +779,7 @@ tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.maxDescriptorSetInputAttachments = max_descriptor_set_size,
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
- .maxVertexInputAttributeOffset = 2047,
+ .maxVertexInputAttributeOffset = 4095,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
.maxTessellationGenerationLevel = 64,
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 816b3de5c32..adf848d5697 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -112,32 +112,6 @@ tu_shader_stage(VkShaderStageFlagBits stage)
}
}
-static const VkVertexInputAttributeDescription *
-tu_find_vertex_input_attribute(
- const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot)
-{
- assert(slot >= VERT_ATTRIB_GENERIC0);
- slot -= VERT_ATTRIB_GENERIC0;
- for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
- if (vi_info->pVertexAttributeDescriptions[i].location == slot)
- return &vi_info->pVertexAttributeDescriptions[i];
- }
- return NULL;
-}
-
-static const VkVertexInputBindingDescription *
-tu_find_vertex_input_binding(
- const VkPipelineVertexInputStateCreateInfo *vi_info,
- const VkVertexInputAttributeDescription *vi_attr)
-{
- assert(vi_attr);
- for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
- if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding)
- return &vi_info->pVertexBindingDescriptions[i];
- }
- return NULL;
-}
-
static bool
tu_logic_op_reads_dst(VkLogicOp op)
{
@@ -1380,61 +1354,76 @@ tu6_emit_program(struct tu_cs *cs,
static void
tu6_emit_vertex_input(struct tu_cs *cs,
const struct ir3_shader_variant *vs,
- const VkPipelineVertexInputStateCreateInfo *vi_info,
+ const VkPipelineVertexInputStateCreateInfo *info,
uint8_t bindings[MAX_VERTEX_ATTRIBS],
- uint16_t strides[MAX_VERTEX_ATTRIBS],
- uint16_t offsets[MAX_VERTEX_ATTRIBS],
uint32_t *count)
{
+ uint32_t vfd_fetch_idx = 0;
uint32_t vfd_decode_idx = 0;
+ uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
- for (uint32_t i = 0; i < vs->inputs_count; i++) {
- if (vs->inputs[i].sysval || !vs->inputs[i].compmask)
- continue;
+ for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *binding =
+ &info->pVertexBindingDescriptions[i];
- const VkVertexInputAttributeDescription *vi_attr =
- tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot);
- const VkVertexInputBindingDescription *vi_binding =
- tu_find_vertex_input_binding(vi_info, vi_attr);
- assert(vi_attr && vi_binding);
+ tu_cs_emit_regs(cs,
+ A6XX_VFD_FETCH_STRIDE(vfd_fetch_idx, binding->stride));
- const struct tu_native_format format = tu6_format_vtx(vi_attr->format);
+ if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
+ binding_instanced |= 1 << binding->binding;
- uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) |
- A6XX_VFD_DECODE_INSTR_FORMAT(format.fmt) |
- A6XX_VFD_DECODE_INSTR_SWAP(format.swap) |
- A6XX_VFD_DECODE_INSTR_UNK30;
- if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
- vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED;
- if (!vk_format_is_int(vi_attr->format))
- vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT;
-
- const uint32_t vfd_decode_step_rate = 1;
-
- const uint32_t vfd_dest_cntl =
- A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) |
- A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2);
- tu_cs_emit(cs, vfd_decode);
- tu_cs_emit(cs, vfd_decode_step_rate);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1);
- tu_cs_emit(cs, vfd_dest_cntl);
-
- bindings[vfd_decode_idx] = vi_binding->binding;
- strides[vfd_decode_idx] = vi_binding->stride;
- offsets[vfd_decode_idx] = vi_attr->offset;
-
- vfd_decode_idx++;
- assert(vfd_decode_idx <= MAX_VERTEX_ATTRIBS);
+ bindings[vfd_fetch_idx] = binding->binding;
+ vfd_fetch_idx++;
}
- tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1);
- tu_cs_emit(
- cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8));
+ /* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
- *count = vfd_decode_idx;
+ for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
+ const VkVertexInputAttributeDescription *attr =
+ &info->pVertexAttributeDescriptions[i];
+ uint32_t binding_idx, input_idx;
+
+ for (binding_idx = 0; binding_idx < vfd_fetch_idx; binding_idx++) {
+ if (bindings[binding_idx] == attr->binding)
+ break;
+ }
+ assert(binding_idx < vfd_fetch_idx);
+
+ for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
+ if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
+ break;
+ }
+
+ /* attribute not used, skip it */
+ if (input_idx == vs->inputs_count)
+ continue;
+
+ const struct tu_native_format format = tu6_format_vtx(attr->format);
+ tu_cs_emit_regs(cs,
+ A6XX_VFD_DECODE_INSTR(vfd_decode_idx,
+ .idx = binding_idx,
+ .offset = attr->offset,
+ .instanced = binding_instanced & (1 << attr->binding),
+ .format = format.fmt,
+ .swap = format.swap,
+ .unk30 = 1,
+ ._float = !vk_format_is_int(attr->format)),
+ A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1));
+
+ tu_cs_emit_regs(cs,
+ A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
+ .writemask = vs->inputs[input_idx].compmask,
+ .regid = vs->inputs[input_idx].regid));
+
+ vfd_decode_idx++;
+ }
+
+ tu_cs_emit_regs(cs,
+ A6XX_VFD_CONTROL_0(
+ .fetch_cnt = vfd_fetch_idx,
+ .decode_cnt = vfd_decode_idx));
+
+ *count = vfd_fetch_idx;
}
static uint32_t
@@ -2006,18 +1995,16 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
struct tu_cs vi_cs;
tu_cs_begin_sub_stream(&pipeline->cs,
- MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+ MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info,
- pipeline->vi.bindings, pipeline->vi.strides,
- pipeline->vi.offsets, &pipeline->vi.count);
+ pipeline->vi.bindings, &pipeline->vi.count);
pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
if (vs->has_binning_pass) {
tu_cs_begin_sub_stream(&pipeline->cs,
- MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs);
+ MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(
&vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings,
- pipeline->vi.binning_strides, pipeline->vi.binning_offsets,
&pipeline->vi.binning_count);
pipeline->vi.binning_state_ib =
tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index bc96ebb6d21..d5d63a6bd79 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1208,13 +1208,9 @@ struct tu_pipeline
struct
{
uint8_t bindings[MAX_VERTEX_ATTRIBS];
- uint16_t strides[MAX_VERTEX_ATTRIBS];
- uint16_t offsets[MAX_VERTEX_ATTRIBS];
uint32_t count;
uint8_t binning_bindings[MAX_VERTEX_ATTRIBS];
- uint16_t binning_strides[MAX_VERTEX_ATTRIBS];
- uint16_t binning_offsets[MAX_VERTEX_ATTRIBS];
uint32_t binning_count;
struct tu_cs_entry state_ib;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index 6f74fbd49c6..8fcb603bfa1 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -689,7 +689,8 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
}
OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
- OUT_RING(ring, A6XX_VFD_CONTROL_0_VTXCNT(j) | (j << 8));
+ OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(j) |
+ A6XX_VFD_CONTROL_0_DECODE_CNT(j));
return ring;
}