diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index 8539e8eff4c..ae5c5f57e85 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -327,6 +327,7 @@ generate_compute(struct llvmpipe_context *lp, LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg; LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg; LLVMValueRef work_dim_arg, draw_id_arg, thread_data_ptr, io_ptr; + LLVMValueRef num_subgroup_loop, partials, subgroup_id, coro_mem; LLVMBasicBlockRef block; LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; @@ -337,6 +338,8 @@ generate_compute(struct llvmpipe_context *lp, bool is_mesh = nir->info.stage == MESA_SHADER_MESH; unsigned i; + bool use_coro = nir->info.uses_memory_barrier || is_mesh; + LLVMValueRef output_array = NULL; /* @@ -392,9 +395,13 @@ generate_compute(struct llvmpipe_context *lp, function = LLVMAddFunction(gallivm->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); - coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type); - LLVMSetFunctionCallConv(coro, LLVMCCallConv); - lp_build_coro_add_presplit(coro); + if (use_coro) { + coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type); + LLVMSetFunctionCallConv(coro, LLVMCCallConv); + lp_build_coro_add_presplit(coro); + } else { + coro = function; + } variant->function = function; variant->function_name = MALLOC(strlen(func_name)+1); @@ -403,7 +410,8 @@ generate_compute(struct llvmpipe_context *lp, for (i = 0; i < CS_ARG_MAX - !is_mesh; ++i) { if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { - lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS); + if (use_coro) + lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS); if (i < CS_ARG_OUTER_COUNT) lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); } @@ -411,7 +419,8 @@ generate_compute(struct llvmpipe_context *lp, if (variant->gallivm->cache->data_size) { gallivm_stub_func(gallivm, function); - gallivm_stub_func(gallivm, coro); + if (use_coro) + gallivm_stub_func(gallivm, coro); return; } @@ -560,140 +569,161 @@ generate_compute(struct llvmpipe_context *lp, } } - block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); - builder = gallivm->builder; - assert(builder); - LLVMPositionBuilderAtEnd(builder, block); - - if (is_mesh) { - LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm); - output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs"); - } - - struct lp_build_loop_state loop_state[2]; - + LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length); - LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""); - invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, ""); + if (use_coro) { + block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); + builder = gallivm->builder; + assert(builder); + LLVMPositionBuilderAtEnd(builder, block); - LLVMValueRef partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, ""); + if (is_mesh) { + LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm); + output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs"); + } - LLVMValueRef num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), ""); - num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, ""); + struct lp_build_loop_state loop_state[2]; - /* build a ptr in memory to store all the frames in later. */ - LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); - LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem"); - LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem); + LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""); + invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, ""); - LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls"); + partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, ""); - unsigned end_coroutine = INT_MAX; + num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), ""); + num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, ""); - /* - * This is the main coroutine execution loop. It iterates over the dimensions - * and calls the coroutine main entrypoint on the first pass, but in subsequent - * passes it checks if the coroutine has completed and resumes it if not. - */ - lp_build_loop_begin(&loop_state[1], gallivm, - lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */ - lp_build_loop_begin(&loop_state[0], gallivm, - lp_build_const_int32(gallivm, 0)); /* subgroup loop */ - { - LLVMValueRef args[CS_ARG_MAX]; - args[CS_ARG_CONTEXT] = context_ptr; - args[CS_ARG_RESOURCES] = resources_ptr; - args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type); - args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type); - args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type); - args[CS_ARG_GRID_X] = grid_x_arg; - args[CS_ARG_GRID_Y] = grid_y_arg; - args[CS_ARG_GRID_Z] = grid_z_arg; - args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg; - args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg; - args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg; - args[CS_ARG_WORK_DIM] = work_dim_arg; - args[CS_ARG_DRAW_ID] = draw_id_arg; - args[CS_ARG_VERTEX_DATA] = io_ptr; - args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr; - args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop; - args[CS_ARG_CORO_PARTIALS] = partials; - args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg; - args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg; - args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg; + /* build a ptr in memory to store all the frames in later. */ + coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem"); + LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem); - args[CS_ARG_CORO_IDX] = loop_state[0].counter; + LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls"); - args[CS_ARG_CORO_MEM] = coro_mem; + unsigned end_coroutine = INT_MAX; + /* + * This is the main coroutine execution loop. It iterates over the dimensions + * and calls the coroutine main entrypoint on the first pass, but in subsequent + * passes it checks if the coroutine has completed and resumes it if not. + */ + lp_build_loop_begin(&loop_state[1], gallivm, + lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */ + lp_build_loop_begin(&loop_state[0], gallivm, + lp_build_const_int32(gallivm, 0)); /* subgroup loop */ + { + LLVMValueRef args[CS_ARG_MAX]; + args[CS_ARG_CONTEXT] = context_ptr; + args[CS_ARG_RESOURCES] = resources_ptr; + args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type); + args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type); + args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type); + args[CS_ARG_GRID_X] = grid_x_arg; + args[CS_ARG_GRID_Y] = grid_y_arg; + args[CS_ARG_GRID_Z] = grid_z_arg; + args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg; + args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg; + args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg; + args[CS_ARG_WORK_DIM] = work_dim_arg; + args[CS_ARG_DRAW_ID] = draw_id_arg; + args[CS_ARG_VERTEX_DATA] = io_ptr; + args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr; + args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop; + args[CS_ARG_CORO_PARTIALS] = partials; + args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg; + args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg; + args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg; + + args[CS_ARG_CORO_IDX] = loop_state[0].counter; + + args[CS_ARG_CORO_MEM] = coro_mem; + + if (is_mesh) + args[CS_ARG_CORO_OUTPUTS] = output_array; + + LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, ""); + + LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl"); + + struct lp_build_if_state ifstate; + LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter, + lp_build_const_int32(gallivm, 0), ""); + /* first time here - call the coroutine function entry point */ + lp_build_if(&ifstate, gallivm, cmp); + LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, ""); + LLVMBuildStore(gallivm->builder, coro_ret, coro_entry); + lp_build_else(&ifstate); + /* subsequent calls for this invocation - check if done. */ + LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl); + struct lp_build_if_state ifstate2; + lp_build_if(&ifstate2, gallivm, coro_done); + /* if done destroy and force loop exit */ + lp_build_coro_destroy(gallivm, coro_hdl); + lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1)); + lp_build_else(&ifstate2); + /* otherwise resume the coroutine */ + lp_build_coro_resume(gallivm, coro_hdl); + lp_build_endif(&ifstate2); + lp_build_endif(&ifstate); + lp_build_loop_force_reload_counter(&loop_state[1]); + } + lp_build_loop_end_cond(&loop_state[0], + num_subgroup_loop, + NULL, LLVMIntUGE); + lp_build_loop_end_cond(&loop_state[1], + lp_build_const_int32(gallivm, end_coroutine), + NULL, LLVMIntEQ); + + LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, ""); + LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0); + LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, ""); + + LLVMBuildRetVoid(builder); + + /* This is stage (b) - generate the compute shader code inside the coroutine. */ + context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT); + resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES); + grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X); + grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y); + grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z); + grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X); + grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y); + grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z); + work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM); + draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID); + io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA); + thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA); + num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT); + partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS); + block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE); + block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE); + block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE); + subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX); + coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM); if (is_mesh) - args[CS_ARG_CORO_OUTPUTS] = output_array; - - LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, ""); - - LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl"); - - struct lp_build_if_state ifstate; - LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter, - lp_build_const_int32(gallivm, 0), ""); - /* first time here - call the coroutine function entry point */ - lp_build_if(&ifstate, gallivm, cmp); - LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, ""); - LLVMBuildStore(gallivm->builder, coro_ret, coro_entry); - lp_build_else(&ifstate); - /* subsequent calls for this invocation - check if done. */ - LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl); - struct lp_build_if_state ifstate2; - lp_build_if(&ifstate2, gallivm, coro_done); - /* if done destroy and force loop exit */ - lp_build_coro_destroy(gallivm, coro_hdl); - lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1)); - lp_build_else(&ifstate2); - /* otherwise resume the coroutine */ - lp_build_coro_resume(gallivm, coro_hdl); - lp_build_endif(&ifstate2); - lp_build_endif(&ifstate); - lp_build_loop_force_reload_counter(&loop_state[1]); + output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS); } - lp_build_loop_end_cond(&loop_state[0], - num_subgroup_loop, - NULL, LLVMIntUGE); - lp_build_loop_end_cond(&loop_state[1], - lp_build_const_int32(gallivm, end_coroutine), - NULL, LLVMIntEQ); - LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, ""); - LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); - LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0); - LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, ""); - - LLVMBuildRetVoid(builder); - - /* This is stage (b) - generate the compute shader code inside the coroutine. */ - context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT); - resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES); - grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X); - grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y); - grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z); - grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X); - grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y); - grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z); - work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM); - draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID); - io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA); - thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA); - num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT); - partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS); - block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE); - block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE); - block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE); - LLVMValueRef subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX); - coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM); - if (is_mesh) - output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS); block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry"); + builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); + + struct lp_build_loop_state loop_state; + + if (!use_coro) { + LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""); + invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, ""); + + partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, ""); + + num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), ""); + num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, ""); + + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + subgroup_id = loop_state.counter; + } + { LLVMValueRef consts_ptr; LLVMValueRef ssbo_ptr; @@ -718,12 +748,16 @@ generate_compute(struct llvmpipe_context *lp, thread_data_ptr); /* these are coroutine entrypoint necessities */ - LLVMValueRef coro_id = lp_build_coro_id(gallivm); - LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop); - LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context); - LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, ""); - alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, ""); - LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr); + LLVMValueRef coro_hdl = NULL; + if (use_coro) { + LLVMValueRef coro_id = lp_build_coro_id(gallivm); + LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop); + LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context); + LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, ""); + alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, ""); + coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr); + } + LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), ""); struct lp_build_context bld; @@ -798,13 +832,11 @@ generate_compute(struct llvmpipe_context *lp, mask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, ""); lp_build_mask_begin(&mask, gallivm, cs_type, mask_val); - struct lp_build_coro_suspend_info coro_info; - - LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend"); - LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup"); - - coro_info.suspend = sus_block; - coro_info.cleanup = clean_block; + struct lp_build_coro_suspend_info coro_info = {0}; + if (use_coro) { + coro_info.suspend = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend"); + coro_info.cleanup = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup"); + } if (is_mesh) { LLVMValueRef vertex_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "vertex_count"); @@ -908,16 +940,23 @@ generate_compute(struct llvmpipe_context *lp, NULL, LLVMIntUGE); } + if (!use_coro) + lp_build_loop_end_cond(&loop_state, num_subgroup_loop, NULL, LLVMIntUGE); + mask_val = lp_build_mask_end(&mask); - lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true); - LLVMPositionBuilderAtEnd(builder, clean_block); + if (use_coro) { + lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true); + LLVMPositionBuilderAtEnd(builder, coro_info.cleanup); - LLVMBuildBr(builder, sus_block); - LLVMPositionBuilderAtEnd(builder, sus_block); + LLVMBuildBr(builder, coro_info.suspend); + LLVMPositionBuilderAtEnd(builder, coro_info.suspend); - lp_build_coro_end(gallivm, coro_hdl); - LLVMBuildRet(builder, coro_hdl); + lp_build_coro_end(gallivm, coro_hdl); + LLVMBuildRet(builder, coro_hdl); + } else { + LLVMBuildRetVoid(builder); + } } lp_bld_llvm_sampler_soa_destroy(sampler);