diff --git a/src/asahi/compiler/agx_lower_uniform_sources.c b/src/asahi/compiler/agx_lower_uniform_sources.c index 7bbe564304b..15bfac82de7 100644 --- a/src/asahi/compiler/agx_lower_uniform_sources.c +++ b/src/asahi/compiler/agx_lower_uniform_sources.c @@ -30,6 +30,10 @@ should_lower(enum agx_opcode op, agx_index uniform, unsigned src_index) case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_ATOMIC: return src_index != 1 || high; + case AGX_OPCODE_LOCAL_LOAD: + return src_index != 0; + case AGX_OPCODE_LOCAL_STORE: + return src_index != 1; case AGX_OPCODE_ZS_EMIT: case AGX_OPCODE_ST_TILE: case AGX_OPCODE_LD_TILE: diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index cff9d00ae0d..1c7ac4b0a01 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -267,12 +267,23 @@ op("device_load", encoding_32 = (0x05, 0x7F, 6, 8), srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False) +# sources are base (relative to workgroup memory), index +op("local_load", + encoding_32 = (0b1101001, 0, 6, 8), + srcs = 2, imms = [FORMAT, MASK]) + # sources are value, base, index # TODO: Consider permitting the short form op("device_store", encoding_32 = (0x45 | (1 << 47), 0, 8, _), dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False) +# sources are value, base, index +op("local_store", + encoding_32 = (0b0101001, 0, 6, 8), + dests = 0, srcs = 3, imms = [FORMAT, MASK], + can_eliminate=False) + # sources are value, index # TODO: Consider permitting the short form op("uniform_store", diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index bc0a2db025c..e71d579035a 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -132,10 +132,13 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs, continue; if (I->op == AGX_OPCODE_ZS_EMIT && s != 0) continue; - if ((I->op == AGX_OPCODE_DEVICE_STORE || I->op == AGX_OPCODE_ATOMIC || + if ((I->op == AGX_OPCODE_DEVICE_STORE || + I->op == AGX_OPCODE_LOCAL_STORE || I->op == AGX_OPCODE_ATOMIC || I->op == AGX_OPCODE_LOCAL_ATOMIC) && s != 2) continue; + if (I->op == AGX_OPCODE_LOCAL_LOAD && s != 1) + continue; if (float_src) { bool fp16 = (def->dest[0].size == AGX_SIZE_16); diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index 8f2c7b45831..dc124362e38 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -58,6 +58,7 @@ agx_write_registers(agx_instr *I, unsigned d) return 4 * size; case AGX_OPCODE_DEVICE_LOAD: + case AGX_OPCODE_LOCAL_LOAD: case AGX_OPCODE_LD_TILE: return util_bitcount(I->mask) * size; @@ -98,6 +99,7 @@ agx_read_registers(agx_instr *I, unsigned s) return I->nr_dests * agx_size_align_16(agx_split_width(I)); case AGX_OPCODE_DEVICE_STORE: + case AGX_OPCODE_LOCAL_STORE: case AGX_OPCODE_ST_TILE: if (s == 0) return util_bitcount(I->mask) * size;