diff --git a/src/amd/compiler/instruction_selection/aco_instruction_selection.h b/src/amd/compiler/instruction_selection/aco_instruction_selection.h index 808986229cb..d8263e288c4 100644 --- a/src/amd/compiler/instruction_selection/aco_instruction_selection.h +++ b/src/amd/compiler/instruction_selection/aco_instruction_selection.h @@ -288,9 +288,9 @@ void build_end_with_regs(isel_context* ctx, std::vector& regs); Instruction* add_startpgm(struct isel_context* ctx, bool is_callee = false); void finish_program(isel_context* ctx); -struct callee_info get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count, - const nir_parameter* parameters, Program* program, - RegisterDemand reg_limit); +struct callee_info get_callee_info(amd_gfx_level gfx_level, unsigned wave_size, const ABI& abi, + unsigned param_count, const nir_parameter* parameters, + Program* program, RegisterDemand reg_limit); void load_scratch_param(isel_context* ctx, Builder& bld, const parameter_info& param, Temp stack_ptr, unsigned scratch_param_size, Temp dst); void store_scratch_param(isel_context* ctx, Builder& bld, const parameter_info& param, diff --git a/src/amd/compiler/instruction_selection/aco_isel_helpers.cpp b/src/amd/compiler/instruction_selection/aco_isel_helpers.cpp index 76135865463..2f7f7a8d00c 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_helpers.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_helpers.cpp @@ -985,7 +985,7 @@ find_param_regs(Program* program, const ABI& abi, callee_info& info, } struct callee_info -get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count, +get_callee_info(amd_gfx_level gfx_level, unsigned wave_size, const ABI& abi, unsigned param_count, const nir_parameter* parameters, Program* program, RegisterDemand reg_limit) { struct callee_info info = {}; @@ -1052,6 +1052,10 @@ get_callee_info(amd_gfx_level gfx_level, const ABI& abi, unsigned param_count, for (unsigned i = 0; i < param_count; ++i) { RegType type = parameters[i].is_uniform ? RegType::sgpr : RegType::vgpr; unsigned byte_size = align(parameters[i].bit_size, 32) / 8 * parameters[i].num_components; + if (parameters[i].bit_size == 1) { + type = RegType::sgpr; + byte_size = wave_size / 8; + } RegClass rc = RegClass(type, byte_size / 4); Temp dst = program ? program->allocateTmp(rc) : Temp(); diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index 049526fb6b4..63a3ddf48b5 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -806,8 +806,8 @@ visit_call(isel_context* ctx, nir_call_instr* instr) RegisterDemand limit = get_addr_regs_from_waves(ctx->program, ctx->program->min_waves); struct callee_info info = - get_callee_info(ctx->program->gfx_level, abi, instr->callee->num_params, - instr->callee->params, nullptr, limit); + get_callee_info(ctx->program->gfx_level, ctx->program->wave_size, abi, + instr->callee->num_params, instr->callee->params, nullptr, limit); std::vector return_infos; /* Before setting up the call itself, set up parameters stored in scratch memory. @@ -891,7 +891,7 @@ visit_call(isel_context* ctx, nir_call_instr* instr) Operand& op = call_instr->operands[reg_param_idx + extra_param_count]; op.setPrecolored(info.param_infos[i].def.physReg()); - if (instr->callee->params[i].is_uniform) + if (instr->callee->params[i].is_uniform || instr->callee->params[i].bit_size == 1) op.setTemp(bld.as_uniform(get_ssa_temp(ctx, instr->params[i].ssa))); else op.setTemp(as_vgpr(ctx, get_ssa_temp(ctx, instr->params[i].ssa))); @@ -1371,8 +1371,8 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c ctx.callee_abi = abi; ctx.program->callee_abi = ctx.callee_abi; ctx.callee_info = - get_callee_info(ctx.program->gfx_level, ctx.callee_abi, impl->function->num_params, - impl->function->params, ctx.program, limit); + get_callee_info(ctx.program->gfx_level, ctx.program->wave_size, ctx.callee_abi, + impl->function->num_params, impl->function->params, ctx.program, limit); ctx.program->is_callee = true; Instruction* startpgm = add_startpgm(&ctx, true); diff --git a/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp index cba0c4c9863..d40ea58f25d 100644 --- a/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp @@ -34,8 +34,9 @@ select_rt_prolog(Program* program, ac_shader_config* config, RegisterDemand limit = get_addr_regs_from_waves(program, program->min_waves); - struct callee_info raygen_info = get_callee_info(program->gfx_level, rtRaygenABI, - raygen_param_count, raygen_params, NULL, limit); + struct callee_info raygen_info = + get_callee_info(program->gfx_level, program->wave_size, rtRaygenABI, raygen_param_count, + raygen_params, NULL, limit); /* Inputs: * Ring offsets: s[0-1]