diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index 0e9eff18719..c86a1f2d95e 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -133,11 +133,8 @@ ac_nir_store_parameters_to_attr_ring(nir_builder *b, nir_def *num_export_threads_in_wave); nir_def * -ac_nir_calc_io_off(nir_builder *b, - nir_intrinsic_instr *intrin, - nir_def *base_stride, - unsigned component_stride, - unsigned mapped_location); +ac_nir_calc_io_off(nir_builder *b, unsigned component, nir_def *io_offset, nir_def *base_stride, + unsigned component_stride, unsigned mapped_driver_location); unsigned ac_nir_map_io_location(unsigned location, diff --git a/src/amd/common/nir/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/nir/ac_nir_lower_esgs_io_to_mem.c index 73a4409786a..a89d1d63509 100644 --- a/src/amd/common/nir/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/nir/ac_nir_lower_esgs_io_to_mem.c @@ -161,7 +161,9 @@ lower_es_output_store(nir_builder *b, b->cursor = nir_before_instr(&intrin->instr); unsigned mapped = ac_nir_map_io_location(io_sem.location, st->gs_inputs_read, st->map_io); - nir_def *io_off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4u, mapped); + nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin), + nir_get_io_offset_src(intrin)->ssa, + nir_imm_int(b, 16u), 4u, mapped); nir_def *store_val = intrin->src[0].ssa; if (st->gfx_level <= GFX8) { @@ -292,7 +294,9 @@ gs_per_vertex_input_offset(nir_builder *b, unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */; const nir_io_semantics io_sem = nir_intrinsic_io_semantics(instr); unsigned mapped = ac_nir_map_io_location(io_sem.location, st->gs_inputs_read, st->map_io); - nir_def *io_off = ac_nir_calc_io_off(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, mapped); + nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(instr), + nir_get_io_offset_src(instr)->ssa, + nir_imm_int(b, base_stride * 4u), base_stride, mapped); nir_def *off = nir_iadd(b, io_off, vertex_offset); return nir_imul_imm(b, off, 4u); } diff --git a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c index 219d59e6fd9..3d3c545c47a 100644 --- a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c @@ -280,7 +280,9 @@ lower_ls_output_store(nir_builder *b, nir_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b)); unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_via_lds, st->map_io); - nir_def *io_off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4u, mapped); + nir_def *io_off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin), + nir_get_io_offset_src(intrin)->ssa, + nir_imm_int(b, 16u), 4u, mapped); unsigned write_mask = nir_intrinsic_write_mask(intrin); nir_def *off = nir_iadd_nuw(b, base_off_var, io_off); @@ -348,7 +350,9 @@ hs_per_vertex_input_lds_offset(nir_builder *b, const nir_io_semantics io_sem = nir_intrinsic_io_semantics(instr); const unsigned mapped = ac_nir_map_io_location(io_sem.location, st->tcs_inputs_via_lds, st->map_io); - nir_def *io_offset = ac_nir_calc_io_off(b, instr, nir_imm_int(b, 16u), 4u, mapped); + nir_def *io_offset = ac_nir_calc_io_off(b, nir_intrinsic_component(instr), + nir_get_io_offset_src(instr)->ssa, + nir_imm_int(b, 16u), 4u, mapped); nir_def *lds_offset = nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset); /* The first LDS vec4 is reserved for the tf0/1 shader message group vote. */ @@ -403,7 +407,8 @@ hs_output_lds_offset(nir_builder *b, if (intrin) { const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); const unsigned mapped = hs_output_lds_map_io_location(b->shader, per_vertex, io_sem.location, st); - off = ac_nir_calc_io_off(b, intrin, nir_imm_int(b, 16u), 4, mapped); + off = ac_nir_calc_io_off(b, nir_intrinsic_component(intrin), nir_get_io_offset_src(intrin)->ssa, + nir_imm_int(b, 16u), 4, mapped); } else { off = nir_imm_int(b, 0); } @@ -469,44 +474,38 @@ hs_output_vram_map_io_location(nir_shader *shader, } static nir_def * -hs_per_vertex_output_vmem_offset(nir_builder *b, - lower_tess_io_state *st, - nir_intrinsic_instr *intrin) +hs_per_vertex_output_vmem_offset(nir_builder *b, lower_tess_io_state *st, unsigned location, + unsigned component, nir_def *vertex_index, nir_def *io_offset) { - const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); - nir_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL ? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out) : nir_load_patch_vertices_in(b); nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); nir_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u)); - nir_def *io_offset = - ac_nir_calc_io_off(b, intrin, attr_stride, 4u, - hs_output_vram_map_io_location(b->shader, true, io_sem.location, st)); + nir_def *off = + ac_nir_calc_io_off(b, component, io_offset, attr_stride, 4u, + hs_output_vram_map_io_location(b->shader, true, location, st)); nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u)); - nir_def *vertex_index = nir_get_io_arrayed_index_src(intrin)->ssa; nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u); - return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), io_offset); + return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), off); } static nir_def * -hs_per_patch_output_vmem_offset(nir_builder *b, - lower_tess_io_state *st, - nir_intrinsic_instr *intrin, - unsigned const_base_offset) +hs_per_patch_output_vmem_offset(nir_builder *b, lower_tess_io_state *st, unsigned location, + unsigned component, nir_def *io_offset, unsigned const_base_offset) { nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); nir_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b); - nir_def * off = - intrin - ? ac_nir_calc_io_off(b, intrin, nir_imul_imm(b, tcs_num_patches, 16u), 4u, - hs_output_vram_map_io_location(b->shader, false, nir_intrinsic_io_semantics(intrin).location, st)) + nir_def *off = + io_offset + ? ac_nir_calc_io_off(b, component, io_offset, nir_imul_imm(b, tcs_num_patches, 16u), 4u, + hs_output_vram_map_io_location(b->shader, false, location, st)) : nir_imm_int(b, 0); if (const_base_offset) @@ -545,6 +544,7 @@ lower_hs_output_store(nir_builder *b, intrin->intrinsic == nir_intrinsic_store_output); nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin); + const unsigned component = nir_intrinsic_component(intrin); nir_def *store_val = intrin->src[0].ssa; const unsigned write_mask = nir_intrinsic_write_mask(intrin); const bool write_to_vmem = tcs_output_needs_vmem(intrin, b->shader, st); @@ -552,8 +552,11 @@ lower_hs_output_store(nir_builder *b, if (write_to_vmem) { nir_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output - ? hs_per_vertex_output_vmem_offset(b, st, intrin) - : hs_per_patch_output_vmem_offset(b, st, intrin, 0); + ? hs_per_vertex_output_vmem_offset(b, st, semantics.location, component, + nir_get_io_arrayed_index_src(intrin)->ssa, + nir_get_io_offset_src(intrin)->ssa) + : hs_per_patch_output_vmem_offset(b, st, semantics.location, component, + nir_get_io_offset_src(intrin)->ssa, 0); nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b); nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b); @@ -575,8 +578,6 @@ lower_hs_output_store(nir_builder *b, */ if (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER || semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER) { - const unsigned component = nir_intrinsic_component(intrin); - if (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER) { st->tcs_tess_level_inner_mask |= write_mask << component; @@ -1023,7 +1024,7 @@ hs_store_tess_factors_for_tes(nir_builder *b, tess_levels tessfactors, lower_tes if (st->tcs_tess_level_outer_mask && tes_reads_outer) { const unsigned tf_outer_loc = hs_output_vram_map_io_location(b->shader, false, VARYING_SLOT_TESS_LEVEL_OUTER, st); - nir_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, NULL, tf_outer_loc * 16); + nir_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, 0, 0, NULL, tf_outer_loc * 16); nir_store_buffer_amd(b, tessfactors.outer, hs_ring_tess_offchip, vmem_off_outer, offchip_offset, zero, @@ -1033,7 +1034,7 @@ hs_store_tess_factors_for_tes(nir_builder *b, tess_levels tessfactors, lower_tes if (tessfactors.inner && st->tcs_tess_level_inner_mask && tes_reads_inner) { const unsigned tf_inner_loc = hs_output_vram_map_io_location(b->shader, false, VARYING_SLOT_TESS_LEVEL_INNER, st); - nir_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, NULL, tf_inner_loc * 16); + nir_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, 0, 0, NULL, tf_inner_loc * 16); nir_store_buffer_amd(b, tessfactors.inner, hs_ring_tess_offchip, vmem_off_inner, offchip_offset, zero, @@ -1126,8 +1127,13 @@ lower_tes_input_load(nir_builder *b, nir_def *offchip_ring = nir_load_ring_tess_offchip_amd(b); nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b); nir_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input - ? hs_per_vertex_output_vmem_offset(b, st, intrin) - : hs_per_patch_output_vmem_offset(b, st, intrin, 0); + ? hs_per_vertex_output_vmem_offset(b, st, io_sem.location, + nir_intrinsic_component(intrin), + nir_get_io_arrayed_index_src(intrin)->ssa, + nir_get_io_offset_src(intrin)->ssa) + : hs_per_patch_output_vmem_offset(b, st, io_sem.location, + nir_intrinsic_component(intrin), + nir_get_io_offset_src(intrin)->ssa, 0); nir_def *zero = nir_imm_int(b, 0); nir_def *load = NULL; diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 316852a52e7..491a1d14b11 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -57,16 +57,11 @@ ac_nir_map_io_location(unsigned location, } /** - * This function takes an I/O intrinsic like load/store_input, - * and emits a sequence that calculates the full offset of that instruction, - * including a stride to the base and component offsets. + * This function calculates the full offset of an input/output. */ nir_def * -ac_nir_calc_io_off(nir_builder *b, - nir_intrinsic_instr *intrin, - nir_def *base_stride, - unsigned component_stride, - unsigned mapped_driver_location) +ac_nir_calc_io_off(nir_builder *b, unsigned component, nir_def *io_offset, nir_def *base_stride, + unsigned component_stride, unsigned mapped_driver_location) { /* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */ nir_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location); @@ -75,11 +70,10 @@ ac_nir_calc_io_off(nir_builder *b, * so the instruction effectively reads/writes another input/output * when it has an offset */ - nir_def *offset_op = nir_imul(b, base_stride, - nir_get_io_offset_src(intrin)->ssa); + nir_def *offset_op = nir_imul(b, base_stride, io_offset); /* component is in bytes */ - unsigned const_op = nir_intrinsic_component(intrin) * component_stride; + unsigned const_op = component * component_stride; return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op); }