From dc74285d32df9c09eb896bc4571066402d32d10b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 25 Oct 2021 13:58:20 +0200 Subject: [PATCH] aco: only load streamout buffers if streamout is enabled The streamout_config SGPR is used to determine if streamout is enabled. This fixes a GPU hang with various transform feedback tests: - dEQP-GLES3.functional.transform_feedback.* - KHR-GL46.transform_feedback.api_errors_test - KHR-GL46.draw_indirect.basic-draw*-xfbPaused - KHR-GL46.geometry_shader.api.draw_calls_while_tf_is_paused Cc: 21.3 mesa-stable Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 690baf8b9f5..54575986aaf 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11196,17 +11196,6 @@ emit_streamout(isel_context* ctx, unsigned stream) { Builder bld(ctx->program, ctx->block); - Temp so_buffers[4]; - Temp buf_ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->streamout_buffers)); - for (unsigned i = 0; i < 4; i++) { - unsigned stride = ctx->program->info->so.strides[i]; - if (!stride) - continue; - - Operand off = bld.copy(bld.def(s1), Operand::c32(i * 16u)); - so_buffers[i] = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), buf_ptr, off); - } - Temp so_vtx_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.streamout_config), Operand::c32(0x70010u)); @@ -11223,13 +11212,18 @@ emit_streamout(isel_context* ctx, unsigned stream) Temp so_write_index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.streamout_write_index), tid); + Temp so_buffers[4]; Temp so_write_offset[4]; + Temp buf_ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->streamout_buffers)); for (unsigned i = 0; i < 4; i++) { unsigned stride = ctx->program->info->so.strides[i]; if (!stride) continue; + so_buffers[i] = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), buf_ptr, + bld.copy(bld.def(s1), Operand::c32(i * 16u))); + if (stride == 1) { Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.streamout_write_index),