diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index 020e8b46ba8..2cbc116f079 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -71,17 +71,51 @@ struct ac_shader_args { uint16_t num_sgprs_returned; uint16_t num_vgprs_returned; + /* VS */ struct ac_arg base_vertex; struct ac_arg start_instance; struct ac_arg draw_id; + struct ac_arg vertex_buffers; struct ac_arg vertex_id; + struct ac_arg vs_rel_patch_id; + struct ac_arg vs_prim_id; struct ac_arg instance_id; + + /* Merged shaders */ + struct ac_arg tess_offchip_offset; + struct ac_arg merged_wave_info; + /* On gfx10: + * - bits 0..11: ordered_wave_id + * - bits 12..20: number of vertices in group + * - bits 22..30: number of primitives in group + */ + struct ac_arg gs_tg_info; + struct ac_arg scratch_offset; + + /* TCS */ + struct ac_arg tcs_factor_offset; struct ac_arg tcs_patch_id; struct ac_arg tcs_rel_ids; + + /* TES */ + struct ac_arg tes_u; + struct ac_arg tes_v; + struct ac_arg tes_rel_patch_id; struct ac_arg tes_patch_id; + + /* GS */ + struct ac_arg es2gs_offset; /* separate legacy ES */ + struct ac_arg gs2vs_offset; /* legacy GS */ + struct ac_arg gs_wave_id; /* legacy GS */ + struct ac_arg gs_vtx_offset[6]; /* separate legacy GS */ struct ac_arg gs_prim_id; struct ac_arg gs_invocation_id; + /* Streamout */ + struct ac_arg streamout_config; + struct ac_arg streamout_write_index; + struct ac_arg streamout_offset[4]; + /* PS */ struct ac_arg frag_pos[4]; struct ac_arg front_face; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cc55a62bb18..2bf1c1d79d1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4034,7 +4034,7 @@ Temp wave_id_in_threadgroup(isel_context *ctx) { Builder bld(ctx->program, ctx->block); return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), Operand(24u | (4u << 16))); + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(24u | (4u << 16))); } Temp thread_id_in_threadgroup(isel_context *ctx) @@ -4057,7 +4057,7 @@ Temp wave_count_in_threadgroup(isel_context *ctx) { Builder bld(ctx->program, ctx->block); return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), Operand(28u | (4u << 16))); + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(28u | (4u << 16))); } Temp ngg_gs_vertex_lds_addr(isel_context *ctx, Temp vertex_idx) @@ -4188,7 +4188,7 @@ Temp get_tess_rel_patch_id(isel_context *ctx) return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffu), get_arg(ctx, ctx->args->ac.tcs_rel_ids)); case MESA_SHADER_TESS_EVAL: - return get_arg(ctx, ctx->args->tes_rel_patch_id); + return get_arg(ctx, ctx->args->ac.tes_rel_patch_id); default: unreachable("Unsupported stage in get_tess_rel_patch_id"); } @@ -4384,7 +4384,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr) if (ctx->stage.hw == HWStage::ES) { /* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */ Temp esgs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_VS * 16u)); - Temp es2gs_offset = get_arg(ctx, ctx->args->es2gs_offset); + Temp es2gs_offset = get_arg(ctx, ctx->args->ac.es2gs_offset); store_vmem_mubuf(ctx, src, esgs_ring, offs.first, es2gs_offset, offs.second, elem_size_bytes, write_mask, false, memory_sync_info(), true); } else { Temp lds_base; @@ -4401,7 +4401,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr) /* GFX6-8: VS runs on LS stage when tessellation is used, but LS shares LDS space with HS. * GFX9+: LS is merged into HS, but still uses the same LDS layout. */ - Temp vertex_idx = get_arg(ctx, ctx->args->rel_auto_id); + Temp vertex_idx = get_arg(ctx, ctx->args->ac.vs_rel_patch_id); lds_base = bld.v_mul24_imm(bld.def(v1), vertex_idx, ctx->tcs_num_inputs * 16u); } else { unreachable("Invalid LS or ES stage"); @@ -4458,7 +4458,7 @@ void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool : get_tcs_per_patch_output_vmem_offset(ctx, instr); Temp hs_ring_tess_offchip = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u)); - Temp oc_lds = get_arg(ctx, ctx->args->oc_lds); + Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset); store_vmem_mubuf(ctx, store_val, hs_ring_tess_offchip, vmem_offs.first, oc_lds, vmem_offs.second, elem_size_bytes, write_mask, true, memory_sync_info(storage_vmem_output)); } @@ -4709,7 +4709,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) if (!nir_src_is_const(offset) || nir_src_as_uint(offset)) isel_err(offset.ssa->parent_instr, "Unimplemented non-zero nir_intrinsic_load_input offset"); - Temp vertex_buffers = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->vertex_buffers)); + Temp vertex_buffers = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.vertex_buffers)); unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0; unsigned component = nir_intrinsic_component(instr); @@ -4951,7 +4951,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) } else if (ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) { Temp ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u)); - Temp soffset = get_arg(ctx, ctx->args->oc_lds); + Temp soffset = get_arg(ctx, ctx->args->ac.tess_offchip_offset); std::pair offs = get_tcs_per_patch_output_vmem_offset(ctx, instr); unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8u; @@ -4978,11 +4978,11 @@ std::pair get_gs_per_vertex_input_offset(isel_context *ctx, nir_ Temp elem; if (merged_esgs) { - elem = get_arg(ctx, ctx->args->gs_vtx_offset[i / 2u * 2u]); + elem = get_arg(ctx, ctx->args->ac.gs_vtx_offset[i / 2u * 2u]); if (i % 2u) elem = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(16u), elem); } else { - elem = get_arg(ctx, ctx->args->gs_vtx_offset[i]); + elem = get_arg(ctx, ctx->args->ac.gs_vtx_offset[i]); } if (vertex_offset.id()) { @@ -5000,10 +5000,10 @@ std::pair get_gs_per_vertex_input_offset(isel_context *ctx, nir_ unsigned vertex = nir_src_as_uint(*vertex_src); if (merged_esgs) vertex_offset = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), - get_arg(ctx, ctx->args->gs_vtx_offset[vertex / 2u * 2u]), + get_arg(ctx, ctx->args->ac.gs_vtx_offset[vertex / 2u * 2u]), Operand((vertex % 2u) * 16u), Operand(16u)); else - vertex_offset = get_arg(ctx, ctx->args->gs_vtx_offset[vertex]); + vertex_offset = get_arg(ctx, ctx->args->ac.gs_vtx_offset[vertex]); } std::pair offs = get_intrinsic_io_basic_offset(ctx, instr, base_stride); @@ -5054,7 +5054,7 @@ void visit_load_tes_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *ins Builder bld(ctx->program, ctx->block); Temp ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u)); - Temp oc_lds = get_arg(ctx, ctx->args->oc_lds); + Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8; @@ -5100,8 +5100,8 @@ void visit_load_tess_coord(isel_context *ctx, nir_intrinsic_instr *instr) Builder bld(ctx->program, ctx->block); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - Operand tes_u(get_arg(ctx, ctx->args->tes_u)); - Operand tes_v(get_arg(ctx, ctx->args->tes_v)); + Operand tes_u(get_arg(ctx, ctx->args->ac.tes_u)); + Operand tes_v(get_arg(ctx, ctx->args->ac.tes_v)); Operand tes_w(0u); if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES) { @@ -7111,7 +7111,7 @@ void visit_emit_vertex_with_counter(isel_context *ctx, nir_intrinsic_instr *inst aco_ptr mtbuf{create_instruction(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)}; mtbuf->operands[0] = Operand(gsvs_ring); mtbuf->operands[1] = vaddr_offset; - mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->gs2vs_offset)); + mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); mtbuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); mtbuf->offen = !vaddr_offset.isUndefined(); mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32; @@ -10251,7 +10251,7 @@ static void create_vs_exports(isel_context *ctx) if (ctx->stage.has(SWStage::TES)) ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.tes_patch_id); else - ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->vs_prim_id); + ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.vs_prim_id); } if (ctx->options->key.has_multiview_view_index) { @@ -10647,7 +10647,7 @@ static void write_tcs_tess_factors(isel_context *ctx) } Temp rel_patch_id = get_tess_rel_patch_id(ctx); - Temp tf_base = get_arg(ctx, ctx->args->tess_factor_offset); + Temp tf_base = get_arg(ctx, ctx->args->ac.tcs_factor_offset); Temp byte_offset = bld.v_mul24_imm(bld.def(v1), rel_patch_id, stride * 4u); unsigned tf_const_offset = 0; @@ -10677,7 +10677,7 @@ static void write_tcs_tess_factors(isel_context *ctx) /* Store to offchip for TES to read - only if TES reads them */ if (ctx->args->options->key.tcs.tes_reads_tess_factors) { Temp hs_ring_tess_offchip = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u)); - Temp oc_lds = get_arg(ctx, ctx->args->oc_lds); + Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset); std::pair vmem_offs_outer = get_tcs_per_patch_output_vmem_offset(ctx, nullptr, ctx->tcs_tess_lvl_out_loc); store_vmem_mubuf(ctx, tf_outer_vec, hs_ring_tess_offchip, vmem_offs_outer.first, oc_lds, vmem_offs_outer.second, 4, (1 << outer_comps) - 1, true, memory_sync_info(storage_vmem_output)); @@ -10790,7 +10790,7 @@ static void emit_streamout(isel_context *ctx, unsigned stream) } Temp so_vtx_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->streamout_config), Operand(0x70010u)); + get_arg(ctx, ctx->args->ac.streamout_config), Operand(0x70010u)); Temp tid = emit_mbcnt(ctx, bld.tmp(v1)); @@ -10801,7 +10801,7 @@ static void emit_streamout(isel_context *ctx, unsigned stream) bld.reset(ctx->block); - Temp so_write_index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->streamout_write_idx), tid); + Temp so_write_index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.streamout_write_index), tid); Temp so_write_offset[4]; @@ -10812,15 +10812,15 @@ static void emit_streamout(isel_context *ctx, unsigned stream) if (stride == 1) { Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->streamout_write_idx), - get_arg(ctx, ctx->args->streamout_offset[i])); + get_arg(ctx, ctx->args->ac.streamout_write_index), + get_arg(ctx, ctx->args->ac.streamout_offset[i])); Temp new_offset = bld.vadd32(bld.def(v1), offset, tid); so_write_offset[i] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), new_offset); } else { Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u); Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(4u), - get_arg(ctx, ctx->args->streamout_offset[i])); + get_arg(ctx, ctx->args->ac.streamout_offset[i])); so_write_offset[i] = bld.vadd32(bld.def(v1), offset, offset2); } } @@ -10892,7 +10892,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx) * handling spilling. */ ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets); - ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset); + ctx->program->scratch_offset = get_arg(ctx, ctx->args->ac.scratch_offset); return instr; } @@ -10903,19 +10903,19 @@ void fix_ls_vgpr_init_bug(isel_context *ctx, Pseudo_instruction *startpgm) Builder bld(ctx->program, ctx->block); constexpr unsigned hs_idx = 1u; Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand((8u << 16) | (hs_idx * 8u))); Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp()); /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */ Temp instance_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - get_arg(ctx, ctx->args->rel_auto_id), + get_arg(ctx, ctx->args->ac.vs_rel_patch_id), get_arg(ctx, ctx->args->ac.instance_id), ls_has_nonzero_hs_threads); - Temp rel_auto_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), + Temp vs_rel_patch_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_rel_ids), - get_arg(ctx, ctx->args->rel_auto_id), + get_arg(ctx, ctx->args->ac.vs_rel_patch_id), ls_has_nonzero_hs_threads); Temp vertex_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_patch_id), @@ -10923,7 +10923,7 @@ void fix_ls_vgpr_init_bug(isel_context *ctx, Pseudo_instruction *startpgm) ls_has_nonzero_hs_threads); ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = instance_id; - ctx->arg_temps[ctx->args->rel_auto_id.arg_index] = rel_auto_id; + ctx->arg_temps[ctx->args->ac.vs_rel_patch_id.arg_index] = vs_rel_patch_id; ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = vertex_id; } @@ -11075,9 +11075,9 @@ Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) /* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */ Temp count = i == 0 - ? get_arg(ctx, ctx->args->merged_wave_info) + ? get_arg(ctx, ctx->args->ac.merged_wave_info) : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u)); + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(i * 8u)); return lanecount_to_mask(ctx, count); } @@ -11086,14 +11086,14 @@ Temp ngg_max_vertex_count(isel_context *ctx) { Builder bld(ctx->program, ctx->block); return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->gs_tg_info), Operand(12u | (9u << 16u))); + get_arg(ctx, ctx->args->ac.gs_tg_info), Operand(12u | (9u << 16u))); } Temp ngg_max_primitive_count(isel_context *ctx) { Builder bld(ctx->program, ctx->block); return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->gs_tg_info), Operand(22u | (9u << 16u))); + get_arg(ctx, ctx->args->ac.gs_tg_info), Operand(22u | (9u << 16u))); } void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Temp prm_cnt = Temp()) @@ -11105,7 +11105,7 @@ void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Tem /* Get the id of the current wave within the threadgroup (workgroup) */ Builder::Result wave_id_in_tg = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->merged_wave_info), Operand(24u | (4u << 16))); + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(24u | (4u << 16))); /* Execute the following code only on the first wave (wave id 0), * use the SCC def to tell if the wave id is zero or not. @@ -11216,7 +11216,7 @@ void ngg_emit_prim_export(isel_context *ctx, unsigned num_vertices_per_primitive Temp prim_exp_arg; if (!ctx->stage.has(SWStage::GS) && ctx->args->options->key.vs_common_out.as_ngg_passthrough) - prim_exp_arg = get_arg(ctx, ctx->args->gs_vtx_offset[0]); + prim_exp_arg = get_arg(ctx, ctx->args->ac.gs_vtx_offset[0]); else prim_exp_arg = ngg_pack_prim_exp_arg(ctx, num_vertices_per_primitive, vtxindex, is_null); @@ -11258,13 +11258,13 @@ void ngg_nogs_export_primitives(isel_context *ctx) Temp vtxindex[max_vertices_per_primitive]; if (!ctx->args->options->key.vs_common_out.as_ngg_passthrough) { vtxindex[0] = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu), - get_arg(ctx, ctx->args->gs_vtx_offset[0])); + get_arg(ctx, ctx->args->ac.gs_vtx_offset[0])); vtxindex[1] = num_vertices_per_primitive < 2 ? Temp(0, v1) : bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), - get_arg(ctx, ctx->args->gs_vtx_offset[0]), Operand(16u), Operand(16u)); + get_arg(ctx, ctx->args->ac.gs_vtx_offset[0]), Operand(16u), Operand(16u)); vtxindex[2] = num_vertices_per_primitive < 3 ? Temp(0, v1) : bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu), - get_arg(ctx, ctx->args->gs_vtx_offset[2])); + get_arg(ctx, ctx->args->ac.gs_vtx_offset[2])); } /* Export primitive data to the index buffer. */ @@ -11797,10 +11797,10 @@ void select_program(Program *program, create_workgroup_barrier(bld); if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) { - ctx.gs_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, m0), bld.def(s1, scc), get_arg(&ctx, args->merged_wave_info), Operand((8u << 16) | 16u)); + ctx.gs_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, m0), bld.def(s1, scc), get_arg(&ctx, args->ac.merged_wave_info), Operand((8u << 16) | 16u)); } } else if (ctx.stage == geometry_gs) - ctx.gs_wave_id = get_arg(&ctx, args->gs_wave_id); + ctx.gs_wave_id = get_arg(&ctx, args->ac.gs_wave_id); if (ctx.stage == fragment_fs) handle_bc_optimize(&ctx); @@ -11876,7 +11876,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader, Operand stream_id(0u); if (args->shader_info->so.num_outputs) stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(&ctx, ctx.args->streamout_config), Operand(0x20018u)); + get_arg(&ctx, ctx.args->ac.streamout_config), Operand(0x20018u)); Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), get_arg(&ctx, ctx.args->ac.vertex_id)); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index c058d945caf..0c895854851 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -58,7 +58,7 @@ struct radv_shader_context { LLVMValueRef ring_offsets; - LLVMValueRef rel_auto_id; + LLVMValueRef vs_rel_patch_id; LLVMValueRef gs_wave_id; LLVMValueRef gs_vtx_offset[6]; @@ -108,7 +108,7 @@ static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids), 0, 8); case MESA_SHADER_TESS_EVAL: - return ac_get_arg(&ctx->ac, ctx->args->tes_rel_patch_id); + return ac_get_arg(&ctx->ac, ctx->args->ac.tes_rel_patch_id); break; default: unreachable("Illegal stage"); @@ -559,7 +559,7 @@ store_tcs_output(struct ac_shader_abi *abi, LLVMValueRef dw_addr; LLVMValueRef stride = NULL; LLVMValueRef buf_addr = NULL; - LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->oc_lds); + LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset); unsigned param = driver_location; bool store_lds = true; @@ -626,7 +626,7 @@ load_tes_input(struct ac_shader_abi *abi, struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); LLVMValueRef buf_addr; LLVMValueRef result; - LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->oc_lds); + LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset); unsigned param = driver_location; buf_addr = get_tcs_tes_buffer_address_params(ctx, param, vertex_index, param_index); @@ -813,7 +813,7 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, out_val, 1, voffset, ac_get_arg(&ctx->ac, - ctx->args->gs2vs_offset), + ctx->args->ac.gs2vs_offset), 0, ac_glc | ac_slc | ac_swizzled); } } @@ -842,8 +842,8 @@ load_tess_coord(struct ac_shader_abi *abi) struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); LLVMValueRef coord[4] = { - ac_get_arg(&ctx->ac, ctx->args->tes_u), - ac_get_arg(&ctx->ac, ctx->args->tes_v), + ac_get_arg(&ctx->ac, ctx->args->ac.tes_u), + ac_get_arg(&ctx->ac, ctx->args->ac.tes_v), ctx->ac.f32_0, ctx->ac.f32_0, }; @@ -1114,7 +1114,7 @@ static void handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable) { - LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->vertex_buffers); + LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers); LLVMValueRef t_offset; LLVMValueRef t_list; LLVMValueRef input; @@ -1619,10 +1619,10 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) int i; /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ - assert(ctx->args->streamout_config.used); + assert(ctx->args->ac.streamout_config.used); LLVMValueRef so_vtx_count = ac_build_bfe(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->streamout_config), + ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config), LLVMConstInt(ctx->ac.i32, 16, false), LLVMConstInt(ctx->ac.i32, 7, false), false); @@ -1644,7 +1644,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) * attrib_offset */ LLVMValueRef so_write_index = - ac_get_arg(&ctx->ac, ctx->args->streamout_write_idx); + ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index); /* Compute (streamout_write_index + thread_id). */ so_write_index = @@ -1670,7 +1670,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) buf_ptr, offset); LLVMValueRef so_offset = - ac_get_arg(&ctx->ac, ctx->args->streamout_offset[i]); + ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]); so_offset = LLVMBuildMul(ctx->ac.builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, false), ""); @@ -1938,7 +1938,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id); else outputs[noutput].values[0] = - ac_get_arg(&ctx->ac, ctx->args->vs_prim_id); + ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id); for (unsigned j = 1; j < 4; j++) outputs[noutput].values[j] = ctx->ac.f32_0; noutput++; @@ -1961,7 +1961,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx, LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4); + ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4); vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, LLVMBuildMul(ctx->ac.builder, wave_idx, LLVMConstInt(ctx->ac.i32, @@ -2013,7 +2013,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx, ctx->esgs_ring, out_val, 1, NULL, - ac_get_arg(&ctx->ac, ctx->args->es2gs_offset), + ac_get_arg(&ctx->ac, ctx->args->ac.es2gs_offset), (4 * i + j) * 4, ac_glc | ac_slc | ac_swizzled); } @@ -2024,7 +2024,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx, static void handle_ls_outputs_post(struct radv_shader_context *ctx) { - LLVMValueRef vertex_id = ctx->rel_auto_id; + LLVMValueRef vertex_id = ctx->vs_rel_patch_id; uint32_t num_tcs_inputs = ctx->args->shader_info->vs.num_linked_outputs; LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false); LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, @@ -2052,12 +2052,12 @@ handle_ls_outputs_post(struct radv_shader_context *ctx) static LLVMValueRef get_wave_id_in_tg(struct radv_shader_context *ctx) { return ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4); + ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4); } static LLVMValueRef get_tgsize(struct radv_shader_context *ctx) { - return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 28, 4); + return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4); } static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx) @@ -2071,7 +2071,7 @@ static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx) static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx) { - return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), + return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info), LLVMConstInt(ctx->ac.i32, 12, false), LLVMConstInt(ctx->ac.i32, 9, false), false); @@ -2079,7 +2079,7 @@ static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx) static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx) { - return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), + return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info), LLVMConstInt(ctx->ac.i32, 22, false), LLVMConstInt(ctx->ac.i32, 9, false), false); @@ -2087,7 +2087,7 @@ static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx) static LLVMValueRef ngg_get_ordered_id(struct radv_shader_context *ctx) { - return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), + return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info), ctx->ac.i32_0, LLVMConstInt(ctx->ac.i32, 12, false), false); @@ -2709,17 +2709,17 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader); LLVMValueRef prims_in_wave = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8); + ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8); LLVMValueRef vtx_in_wave = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8); + ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8); LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), prims_in_wave, ""); LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), vtx_in_wave, ""); LLVMValueRef vtxindex[] = { - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]), 0, 16), - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]), 16, 16), - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[2]), 0, 16), + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16), + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16), + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16), }; /* Determine the number of vertices per primitive. */ @@ -2802,7 +2802,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) struct ac_ngg_prim prim = {0}; if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) { - prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]); + prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]); } else { prim.num_vertices = num_vertices; prim.isnull = ctx->ac.i1false; @@ -3390,7 +3390,7 @@ write_tess_factors(struct radv_shader_context *ctx) buffer = ctx->hs_ring_tess_factor; - tf_base = ac_get_arg(&ctx->ac, ctx->args->tess_factor_offset); + tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset); byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); unsigned tf_offset = 0; @@ -3432,7 +3432,7 @@ write_tess_factors(struct radv_shader_context *ctx) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, outer_comps, tf_outer_offset, - ac_get_arg(&ctx->ac, ctx->args->oc_lds), + ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset), 0, ac_glc); if (inner_comps) { tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, @@ -3442,7 +3442,7 @@ write_tess_factors(struct radv_shader_context *ctx) ac_build_gather_values(&ctx->ac, inner, inner_comps); ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, inner_comps, tf_inner_offset, - ac_get_arg(&ctx->ac, ctx->args->oc_lds), + ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset), 0, ac_glc); } } @@ -3757,15 +3757,15 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class, static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) { LLVMValueRef count = - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8); + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8); LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, ctx->ac.i32_0, ""); ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, - ac_get_arg(&ctx->ac, ctx->args->rel_auto_id), + ac_get_arg(&ctx->ac, ctx->args->ac.vs_rel_patch_id), ctx->abi.instance_id, ""); - ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, + ctx->vs_rel_patch_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids), - ctx->rel_auto_id, + ctx->vs_rel_patch_id, ""); ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id), @@ -3778,17 +3778,17 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged) for(int i = 5; i >= 0; --i) { ctx->gs_vtx_offset[i] = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[i & ~1]), + ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]), (i & 1) * 16, 16); } ctx->gs_wave_id = ac_unpack_param(&ctx->ac, - ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), + ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8); } else { for (int i = 0; i < 6; i++) - ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[i]); - ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->gs_wave_id); + ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]); + ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id); } } @@ -3866,8 +3866,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, if (args->ac.vertex_id.used) ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id); - if (args->rel_auto_id.used) - ctx.rel_auto_id = ac_get_arg(&ctx.ac, args->rel_auto_id); + if (args->ac.vs_rel_patch_id.used) + ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id); if (args->ac.instance_id.used) ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id); @@ -4016,7 +4016,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, LLVMValueRef count = ac_unpack_param(&ctx.ac, - ac_get_arg(&ctx.ac, args->merged_wave_info), + ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8); LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, @@ -4215,7 +4215,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) stream_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, - ctx->args->streamout_config), + ctx->args->ac.streamout_config), 24, 2); } else { stream_id = ctx->ac.i32_0; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index ef6e170899a..abb790fee45 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -279,7 +279,7 @@ declare_vs_specific_input_sgprs(struct radv_shader_args *args, (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (args->shader_info->vs.has_vertex_buffers) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, - &args->vertex_buffers); + &args->ac.vertex_buffers); } ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); @@ -295,7 +295,7 @@ declare_vs_input_vgprs(struct radv_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); if (!args->is_gs_copy_shader) { if (args->options->key.vs_common_out.as_ls) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); if (args->options->chip_class >= GFX10) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); @@ -311,12 +311,12 @@ declare_vs_input_vgprs(struct radv_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); } else { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); } } else { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ } } @@ -339,8 +339,8 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index); } else if (stage == MESA_SHADER_TESS_EVAL) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); } @@ -350,16 +350,16 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) if (!args->shader_info->so.strides[i]) continue; - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); } } static void declare_tes_input_vgprs(struct radv_shader_args *args) { - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id); } @@ -482,7 +482,7 @@ radv_declare_shader_args(struct radv_shader_args *args, if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, @@ -501,7 +501,7 @@ radv_declare_shader_args(struct radv_shader_args *args, if (args->options->key.vs_common_out.as_es) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->es2gs_offset); + &args->ac.es2gs_offset); } else if (args->options->key.vs_common_out.as_ls) { /* no extra parameters */ } else { @@ -510,7 +510,7 @@ radv_declare_shader_args(struct radv_shader_args *args, if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } declare_vs_input_vgprs(args); @@ -518,13 +518,13 @@ radv_declare_shader_args(struct radv_shader_args *args, case MESA_SHADER_TESS_CTRL: if (has_previous_stage) { // First 6 system regs - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->merged_wave_info); + &args->ac.merged_wave_info); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->tess_factor_offset); + &args->ac.tcs_factor_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -553,12 +553,12 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->ac.view_index); } - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->tess_factor_offset); + &args->ac.tcs_factor_offset); if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); @@ -574,17 +574,17 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->ac.view_index); if (args->options->key.vs_common_out.as_es) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->es2gs_offset); + &args->ac.es2gs_offset); } else { declare_streamout_sgprs(args, stage); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); } if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } declare_tes_input_vgprs(args); break; @@ -593,17 +593,17 @@ radv_declare_shader_args(struct radv_shader_args *args, // First 6 system regs if (args->options->key.vs_common_out.as_ngg) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->gs_tg_info); + &args->ac.gs_tg_info); } else { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->gs2vs_offset); + &args->ac.gs2vs_offset); } ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->merged_wave_info); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds); + &args->ac.merged_wave_info); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown @@ -626,15 +626,15 @@ radv_declare_shader_args(struct radv_shader_args *args, } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[0]); + &args->ac.gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[2]); + &args->ac.gs_vtx_offset[2]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[4]); + &args->ac.gs_vtx_offset[4]); if (previous_stage == MESA_SHADER_VERTEX) { declare_vs_input_vgprs(args); @@ -649,26 +649,26 @@ radv_declare_shader_args(struct radv_shader_args *args, &args->ac.view_index); } - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[0]); + &args->ac.gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[1]); + &args->ac.gs_vtx_offset[1]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[2]); + &args->ac.gs_vtx_offset[2]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[3]); + &args->ac.gs_vtx_offset[3]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[4]); + &args->ac.gs_vtx_offset[4]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, - &args->gs_vtx_offset[5]); + &args->ac.gs_vtx_offset[5]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); } @@ -679,7 +679,7 @@ radv_declare_shader_args(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); if (args->options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, - &args->scratch_offset); + &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index f01c63ffa1a..cb79f8a9efd 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -34,36 +34,9 @@ struct radv_shader_args { struct ac_arg descriptor_sets[MAX_SETS]; struct ac_arg ring_offsets; - struct ac_arg scratch_offset; - - struct ac_arg vertex_buffers; - struct ac_arg rel_auto_id; - struct ac_arg vs_prim_id; - struct ac_arg es2gs_offset; - - struct ac_arg oc_lds; - struct ac_arg merged_wave_info; - struct ac_arg tess_factor_offset; - struct ac_arg tes_rel_patch_id; - struct ac_arg tes_u; - struct ac_arg tes_v; - - /* HW GS */ - /* On gfx10: - * - bits 0..11: ordered_wave_id - * - bits 12..20: number of vertices in group - * - bits 22..30: number of primitives in group - */ - struct ac_arg gs_tg_info; - struct ac_arg gs2vs_offset; - struct ac_arg gs_wave_id; - struct ac_arg gs_vtx_offset[6]; /* Streamout */ struct ac_arg streamout_buffers; - struct ac_arg streamout_write_idx; - struct ac_arg streamout_config; - struct ac_arg streamout_offset[4]; /* NGG GS */ struct ac_arg ngg_gs_state; diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 2bb18d8af01..c5ab374ebc2 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -30,12 +30,12 @@ static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->merged_wave_info, 24, 4); + return si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4); } static LLVMValueRef get_tgsize(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->merged_wave_info, 28, 4); + return si_unpack_param(ctx, ctx->args.merged_wave_info, 28, 4); } static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx) @@ -49,17 +49,17 @@ static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx) static LLVMValueRef ngg_get_vtx_cnt(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->gs_tg_info, 12, 9); + return si_unpack_param(ctx, ctx->args.gs_tg_info, 12, 9); } static LLVMValueRef ngg_get_prim_cnt(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->gs_tg_info, 22, 9); + return si_unpack_param(ctx, ctx->args.gs_tg_info, 22, 9); } static LLVMValueRef ngg_get_ordered_id(struct si_shader_context *ctx) { - return si_unpack_param(ctx, ctx->gs_tg_info, 0, 12); + return si_unpack_param(ctx, ctx->args.gs_tg_info, 0, 12); } static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx) @@ -1011,11 +1011,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out } } else { assert(ctx->stage == MESA_SHADER_TESS_EVAL); - LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_u)), + LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tes_u)), ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_tes_u, 0))); - LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_v)), + LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tes_v)), ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_tes_v, 0))); - LLVMBuildStore(builder, LLVMBuildTrunc(builder, ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id), ctx->ac.i8, ""), + LLVMBuildStore(builder, LLVMBuildTrunc(builder, ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id), ctx->ac.i8, ""), si_build_gep_i8(ctx, new_vtx, lds_byte2_tes_rel_patch_id)); if (uses_tes_prim_id) { LLVMBuildStore( @@ -1048,8 +1048,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out ngg_get_prim_cnt(ctx)); /* Update thread counts in SGPRs. */ - LLVMValueRef new_gs_tg_info = ac_get_arg(&ctx->ac, ctx->gs_tg_info); - LLVMValueRef new_merged_wave_info = ac_get_arg(&ctx->ac, ctx->merged_wave_info); + LLVMValueRef new_gs_tg_info = ac_get_arg(&ctx->ac, ctx->args.gs_tg_info); + LLVMValueRef new_merged_wave_info = ac_get_arg(&ctx->ac, ctx->args.merged_wave_info); /* This also converts the thread count from the total count to the per-wave count. */ update_thread_counts(ctx, &new_num_es_threads, &new_gs_tg_info, 9, 12, &new_merged_wave_info, 8, @@ -1128,7 +1128,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_gs_tg_info, 2, ""); ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_merged_wave_info, 3, ""); if (ctx->stage == MESA_SHADER_TESS_EVAL) - ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 4); + ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 4); ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images, @@ -1142,7 +1142,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX); ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID); ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE); - ret = si_insert_input_ptr(ctx, ret, ctx->vertex_buffers, 8 + SI_VS_NUM_USER_SGPR); + ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + SI_VS_NUM_USER_SGPR); for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) { ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i], diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cc112ae3ea1..81a9891fbcf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -174,8 +174,8 @@ static void declare_streamout_params(struct si_shader_context *ctx, /* Streamout SGPRs. */ if (so->num_outputs) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_config); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_write_index); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_config); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_write_index); } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); } @@ -185,7 +185,7 @@ static void declare_streamout_params(struct si_shader_context *ctx, if (!so->stride[i]) continue; - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_offset[i]); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_offset[i]); } } @@ -269,7 +269,7 @@ static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx) static void declare_vb_descriptor_input_sgprs(struct si_shader_context *ctx) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->vertex_buffers); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->args.vertex_buffers); unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs; if (num_vbos_in_user_sgprs) { @@ -295,7 +295,7 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id); if (shader->key.as_ls) { - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->rel_auto_id); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id); if (ctx->screen->info.chip_class >= GFX10) { ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id); @@ -306,11 +306,11 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_ } else if (ctx->screen->info.chip_class >= GFX10) { ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, - &ctx->vs_prim_id); /* user vgpr or PrimID (legacy) */ + &ctx->args.vs_prim_id); /* user vgpr or PrimID (legacy) */ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id); } else { ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->vs_prim_id); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_prim_id); ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ } @@ -348,9 +348,9 @@ static void declare_vs_blit_inputs(struct si_shader_context *ctx, unsigned vs_bl static void declare_tes_input_vgprs(struct si_shader_context *ctx) { - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_u); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_v); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->tes_rel_patch_id); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_u); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_v); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_rel_patch_id); ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_patch_id); } @@ -403,7 +403,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) declare_vb_descriptor_input_sgprs(ctx); if (shader->key.as_es) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset); } else if (shader->key.as_ls) { /* no extra parameters */ } else { @@ -428,8 +428,8 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset); /* VGPRs */ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id); @@ -448,10 +448,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) /* Merged stages have 8 system SGPRs at the beginning. */ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */ declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_TESS_CTRL); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ @@ -511,13 +511,13 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY); if (ctx->shader->key.as_ngg) - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_tg_info); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info); else - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, @@ -599,12 +599,12 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr); if (shader->key.as_es) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset); } else { declare_streamout_params(ctx, &shader->selector->so); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); } /* VGPRs */ @@ -614,17 +614,17 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) case MESA_SHADER_GEOMETRY: declare_global_desc_pointers(ctx); declare_per_stage_desc_pointers(ctx, true); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_wave_id); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_wave_id); /* VGPRs */ - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[0]); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[1]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]); ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[2]); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[3]); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[4]); - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[5]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[2]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[3]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[4]); + ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[5]); ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id); break; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index e8bbd08b958..53bf95efa13 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -80,15 +80,9 @@ struct si_shader_context { struct ac_arg rw_buffers; struct ac_arg bindless_samplers_and_images; - /* Common inputs for merged shaders. */ - struct ac_arg merged_wave_info; - struct ac_arg merged_scratch_offset; struct ac_arg small_prim_cull_info; /* API VS */ - struct ac_arg vertex_buffers; struct ac_arg vb_descriptors[5]; - struct ac_arg rel_auto_id; - struct ac_arg vs_prim_id; struct ac_arg vertex_index0; /* VS states and layout of LS outputs / TCS inputs at the end * [0] = clamp vertex color @@ -110,10 +104,6 @@ struct si_shader_context { */ struct ac_arg vs_state_bits; struct ac_arg vs_blit_inputs; - /* HW VS */ - struct ac_arg streamout_config; - struct ac_arg streamout_write_index; - struct ac_arg streamout_offset[4]; /* API TCS & TES */ /* Layout of TCS outputs in the offchip buffer @@ -141,27 +131,10 @@ struct si_shader_context { * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers */ struct ac_arg tcs_out_lds_layout; - struct ac_arg tcs_offchip_offset; - struct ac_arg tcs_factor_offset; /* API TES */ struct ac_arg tes_offchip_addr; - struct ac_arg tes_u; - struct ac_arg tes_v; - struct ac_arg tes_rel_patch_id; - /* HW ES */ - struct ac_arg es2gs_offset; - /* HW GS */ - /* On gfx10: - * - bits 0..11: ordered_wave_id - * - bits 12..20: number of vertices in group - * - bits 22..30: number of primitives in group - */ - struct ac_arg gs_tg_info; /* API GS */ - struct ac_arg gs2vs_offset; - struct ac_arg gs_wave_id; /* GFX6 */ - struct ac_arg gs_vtx_offset[6]; /* in dwords (GFX6) */ struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */ struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */ struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index c611749be22..64c9907faa0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -393,7 +393,7 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle switch (ctx->stage) { case MESA_SHADER_VERTEX: - return ac_get_arg(&ctx->ac, ctx->vs_prim_id); + return ac_get_arg(&ctx->ac, ctx->args.vs_prim_id); case MESA_SHADER_TESS_CTRL: return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id); case MESA_SHADER_TESS_EVAL: @@ -930,7 +930,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad (ctx->stage == MESA_SHADER_TESS_EVAL || (ctx->stage == MESA_SHADER_VERTEX && !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, ngg_cull_shader)))) { - si_init_exec_from_input(ctx, ctx->merged_wave_info, 0); + si_init_exec_from_input(ctx, ctx->args.merged_wave_info, 0); } else if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_GEOMETRY || (shader->key.as_ngg && !shader->key.as_es)) { LLVMValueRef thread_enabled = NULL; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index fdf22faad73..9e107a6ade0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -31,14 +31,14 @@ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx) { /* Return true if the current thread should execute an ES thread. */ return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), - si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), ""); + si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8), ""); } LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx) { /* Return true if the current thread should execute a GS thread. */ return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), - si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), ""); + si_unpack_param(ctx, ctx->args.merged_wave_info, 8, 8), ""); } static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned input_index, @@ -84,7 +84,7 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in /* GFX6: input load from the ESGS ring in memory. */ /* Get the vertex offset parameter on GFX6. */ - LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->gs_vtx_offset[vtx_offset_param]); + LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[vtx_offset_param]); vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); @@ -119,11 +119,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0); ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1); if (ctx->shader->key.as_ngg) - ret = si_insert_input_ptr(ctx, ret, ctx->gs_tg_info, 2); + ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2); else - ret = si_insert_input_ret(ctx, ret, ctx->gs2vs_offset, 2); - ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3); - ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5); + ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2); + ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3); + ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5); ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images, @@ -158,7 +158,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) { unsigned itemsize_dw = es->selector->esgs_itemsize / 4; LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); - LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->merged_wave_info, 24, 4); + LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4); vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, LLVMBuildMul(ctx->ac.builder, wave_idx, @@ -193,7 +193,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L } ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL, - ac_get_arg(&ctx->ac, ctx->es2gs_offset), + ac_get_arg(&ctx->ac, ctx->args.es2gs_offset), (4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled); } } @@ -205,9 +205,9 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) { if (ctx->screen->info.chip_class >= GFX9) - return si_unpack_param(ctx, ctx->merged_wave_info, 16, 8); + return si_unpack_param(ctx, ctx->args.merged_wave_info, 16, 8); else - return ac_get_arg(&ctx->ac, ctx->gs_wave_id); + return ac_get_arg(&ctx->ac, ctx->args.gs_wave_id); } static void emit_gs_epilogue(struct si_shader_context *ctx) @@ -249,7 +249,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM struct si_shader_info *info = &ctx->shader->selector->info; struct si_shader *shader = ctx->shader; - LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->gs2vs_offset); + LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset); LLVMValueRef gs_next_vertex; LLVMValueRef can_emit; unsigned chan, offset; @@ -464,7 +464,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, LLVMValueRef stream_id; if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) - stream_id = si_unpack_param(&ctx, ctx.streamout_config, 24, 2); + stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2); else stream_id = ctx.ac.i32_0; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index abf91715d4e..4e7f4a0d413 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -33,7 +33,7 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8); case MESA_SHADER_TESS_EVAL: - return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id); + return ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id); default: assert(0); @@ -454,7 +454,7 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef semantic == VARYING_SLOT_TESS_LEVEL_INNER || semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == (vertex_index == NULL)); - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); + base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic); @@ -516,7 +516,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); + base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic); @@ -562,7 +562,8 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v), + LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->args.tes_u), + ac_get_arg(&ctx->ac, ctx->args.tes_v), ctx->ac.f32_0, ctx->ac.f32_0}; /* For triangles, the vector should be (u, v, 1-u-v). */ @@ -579,7 +580,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned sema int param = si_shader_io_get_unique_index_patch(semantic); - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); + base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, LLVMConstInt(ctx->ac.i32, param, 0)); @@ -658,7 +659,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx) invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); - buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); + buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx); lds_base = get_tcs_in_current_patch_offset(ctx); @@ -779,7 +780,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING); /* Get the offset. */ - tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset); + tf_base = ac_get_arg(&ctx->ac, ctx->args.tcs_factor_offset); byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), ""); offset = 0; @@ -809,7 +810,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re unsigned param_outer, param_inner; buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); + base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER); tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, @@ -879,15 +880,15 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_ou si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); /* Tess offchip and tess factor offsets are at the beginning. */ - ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2); - ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4); + ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2); + ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4); vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1; } else { ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT); /* Tess offchip and tess factor offsets are after user SGPRs. */ - ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR); - ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1); + ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR); + ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1); vgpr = GFX6_TCS_NUM_USER_SGPR + 2; } @@ -925,10 +926,10 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0); ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1); - ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2); - ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3); - ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4); - ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5); + ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2); + ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3); + ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4); + ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5); ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS); ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images, @@ -956,7 +957,7 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L struct si_shader *shader = ctx->shader; struct si_shader_info *info = &shader->selector->info; unsigned i, chan; - LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id); + LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id); LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx); LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, ""); unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2; @@ -1020,9 +1021,9 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par if (ctx->screen->info.chip_class >= GFX9) { ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */ - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); @@ -1046,8 +1047,8 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset); } ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 603381f46f0..b9e026701d2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -105,7 +105,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L vb_desc = ac_get_arg(&ctx->ac, ctx->vb_descriptors[input_index]); } else { unsigned index = input_index - num_vbos_in_user_sgprs; - vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->vertex_buffers), + vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.vertex_buffers), LLVMConstInt(ctx->ac.i32, index, 0)); } @@ -301,7 +301,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp int i; /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ - LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->streamout_config, 16, 7); + LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->args.streamout_config, 16, 7); LLVMValueRef tid = ac_get_thread_id(&ctx->ac); @@ -319,7 +319,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp * attrib_offset */ - LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->streamout_write_index); + LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args.streamout_write_index); /* Compute (streamout_write_index + thread_id). */ so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); @@ -338,7 +338,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); - LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->streamout_offset[i]); + LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args.streamout_offset[i]); so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, 0), ""); so_write_offset[i] = ac_build_imad(