diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 48a2b55301f..994ed2860e1 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -1,5 +1,6 @@
 #include <vector>
 #include <algorithm>
+#include <map>
 
 #include "aco_ir.h"
 #include "aco_builder.h"
@@ -10,11 +11,16 @@
 
 namespace aco {
 
+struct constaddr_info {
+   unsigned getpc_end;
+   unsigned add_literal;
+};
+
 struct asm_context {
    Program *program;
    enum chip_class chip_class;
    std::vector<std::pair<int, SOPP_instruction*>> branches;
-   std::vector<unsigned> constaddrs;
+   std::map<unsigned, constaddr_info> constaddrs;
    const int16_t* opcode;
    // TODO: keep track of branch instructions referring blocks
    // and, when emitting the block, correct the offset in instr
@@ -45,39 +51,17 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
 void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
 {
    /* lower remaining pseudo-instructions */
-   if (instr->opcode == aco_opcode::p_constaddr) {
-      unsigned dest = instr->definitions[0].physReg();
-      unsigned offset = instr->operands[0].constantValue();
+   if (instr->opcode == aco_opcode::p_constaddr_getpc) {
+      ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;
 
-      /* s_getpc_b64 dest[0:1] */
-      uint32_t encoding = (0b101111101 << 23);
-      uint32_t opcode = ctx.opcode[(int)aco_opcode::s_getpc_b64];
-      if (opcode >= 55 && ctx.chip_class <= GFX9) {
-         assert(ctx.chip_class == GFX9 && opcode < 60);
-         opcode = opcode - 4;
-      }
-      encoding |= dest << 16;
-      encoding |= opcode << 8;
-      out.push_back(encoding);
+      instr->opcode = aco_opcode::s_getpc_b64;
+      instr->operands.pop_back();
+   } else if (instr->opcode == aco_opcode::p_constaddr_addlo) {
+      ctx.constaddrs[instr->operands[1].constantValue()].add_literal = out.size() + 1;
 
-      /* s_add_u32 dest[0], dest[0], ... */
-      encoding = (0b10 << 30);
-      encoding |= ctx.opcode[(int)aco_opcode::s_add_u32] << 23;
-      encoding |= dest << 16;
-      encoding |= dest;
-      encoding |= 255 << 8;
-      out.push_back(encoding);
-      ctx.constaddrs.push_back(out.size());
-      out.push_back(offset);
-
-      /* s_addc_u32 dest[1], dest[1], 0 */
-      encoding = (0b10 << 30);
-      encoding |= ctx.opcode[(int)aco_opcode::s_addc_u32] << 23;
-      encoding |= (dest + 1) << 16;
-      encoding |= dest + 1;
-      encoding |= 128 << 8;
-      out.push_back(encoding);
-      return;
+      instr->opcode = aco_opcode::s_add_u32;
+      instr->operands[1] = Operand(0u);
+      instr->operands[1].setFixed(PhysReg(255));
    }
 
    uint32_t opcode = ctx.opcode[(int)instr->opcode];
@@ -798,14 +782,14 @@ static void insert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned i
    for (; branch_it != ctx.branches.end(); ++branch_it)
       branch_it->first += insert_count;
 
-   /* Find first constant address after the inserted code */
-   auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [insert_before](const int &caddr_pos) -> bool {
-      return (unsigned)caddr_pos >= insert_before;
-   });
-
-   /* Update the locations of constant addresses */
-   for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
-      (*caddr_it) += insert_count;
+   /* Update the locations of p_constaddr instructions */
+   for (auto& constaddr : ctx.constaddrs) {
+      constaddr_info& info = constaddr.second;
+      if (info.getpc_end >= insert_before)
+         info.getpc_end += insert_count;
+      if (info.add_literal >= insert_before)
+         info.add_literal += insert_count;
+   }
 }
 
 static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
@@ -928,8 +912,10 @@ void fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
 
 void fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
 {
-   for (unsigned addr : ctx.constaddrs)
-      out[addr] += (out.size() - addr + 1u) * 4u;
+   for (auto& constaddr : ctx.constaddrs) {
+      constaddr_info& info = constaddr.second;
+      out[info.add_literal] += (out.size() - info.getpc_end) * 4u;
+   }
 }
 
 unsigned emit_program(Program* program,
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index eda4e9d4340..888359ff5a3 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5550,7 +5550,7 @@ void visit_load_constant(isel_context *ctx, nir_intrinsic_instr *instr)
       offset = bld.vadd32(bld.def(v1), Operand(base), offset);
 
    Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
-                          bld.sop1(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), Operand(ctx->constant_data_offset)),
+                          bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), Operand(ctx->constant_data_offset)),
                           Operand(MIN2(base + range, ctx->shader->constant_data_size)),
                           Operand(desc_type));
    unsigned size = instr->dest.ssa.bit_size / 8;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a2129728d16..42139bd0c01 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1979,6 +1979,17 @@ void lower_to_hw_instr(Program* program)
                   unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute.");
                break;
             }
+            case aco_opcode::p_constaddr:
+            {
+               unsigned id = instr->definitions[0].tempId();
+               PhysReg reg = instr->definitions[0].physReg();
+               bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand(id));
+               bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
+                        Operand(reg, s1), Operand(id));
+               bld.sop2(aco_opcode::s_addc_u32, Definition(reg.advance(4), s1), bld.def(s1, scc),
+                        Operand(reg.advance(4), s1), Operand(0u), Operand(scc, s1));
+               break;
+            }
             default:
                break;
             }
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index a2ae4f17308..9138fe2bac9 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -288,6 +288,8 @@ opcode("p_exit_early_if")
 # simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
 opcode("p_bpermute")
 
+opcode("p_constaddr")
+
 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
 SOP2 = {
   # GFX6, GFX7, GFX8, GFX9, GFX10, name
@@ -344,6 +346,8 @@ SOP2 = {
    (  -1,   -1,   -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
    (  -1,   -1,   -1, 0x2c, 0x35, "s_mul_hi_u32"),
    (  -1,   -1,   -1, 0x2d, 0x36, "s_mul_hi_i32"),
+   # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
+   (  -1,   -1,   -1,   -1,   -1, "p_constaddr_addlo"),
 }
 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2:
     opcode(name, gfx7, gfx9, gfx10, Format.SOP2)
@@ -457,7 +461,7 @@ SOP1 = {
    (  -1,   -1,   -1,   -1, 0x47, "s_andn2_wrexec_b32"),
    (  -1,   -1,   -1,   -1, 0x49, "s_movrelsd_2_b32"),
    # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
-   (  -1,   -1,   -1,   -1,   -1, "p_constaddr"),
+   (  -1,   -1,   -1,   -1,   -1, "p_constaddr_getpc"),
 }
 for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1:
    opcode(name, gfx7, gfx9, gfx10, Format.SOP1)
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp
index bd6055cc20a..47698699d1d 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -220,7 +220,9 @@ BEGIN_TEST(assembler.long_jump.constaddr)
 
    //>> s_getpc_b64 s[0:1]                                          ; be801f00
    //! s_add_u32 s0, s0, 0xe0                                      ; 8000ff00 000000e0
-   bld.sop1(aco_opcode::p_constaddr, Definition(PhysReg(0), s2), Operand(0u));
+   bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand(0u));
+   bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
+            Operand(PhysReg(0), s1), Operand(0u));
 
    program->blocks[2].linear_preds.push_back(0u);
    program->blocks[2].linear_preds.push_back(1u);