diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 399fbc7551d..300e320fd2a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7823,20 +7823,18 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) const_offset %= 4096u; } - aco_ptr mtbuf{create_instruction( - aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)}; - mtbuf->operands[0] = Operand(gsvs_ring); - mtbuf->operands[1] = vaddr_offset; - mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); - mtbuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); - mtbuf->offen = !vaddr_offset.isUndefined(); - mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32; - mtbuf->nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - mtbuf->offset = const_offset; - mtbuf->glc = ctx->program->gfx_level < GFX11; - mtbuf->slc = true; - mtbuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder); - bld.insert(std::move(mtbuf)); + aco_ptr mubuf{create_instruction( + aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)}; + mubuf->operands[0] = Operand(gsvs_ring); + mubuf->operands[1] = vaddr_offset; + mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); + mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); + mubuf->offen = !vaddr_offset.isUndefined(); + mubuf->offset = const_offset; + mubuf->glc = ctx->program->gfx_level < GFX11; + mubuf->slc = true; + mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder); + bld.insert(std::move(mubuf)); } offset += ctx->shader->info.gs.vertices_out; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1a3bb842d93..7c44781ad6f 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -4061,6 +4061,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + } else if (device->physical_device->rad_info.gfx_level >= GFX8) { + /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ + desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); @@ -4133,6 +4137,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + } else if (device->physical_device->rad_info.gfx_level >= GFX8) { + /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ + desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);