diff --git a/src/asahi/compiler/agx_lower_uniform_sources.c b/src/asahi/compiler/agx_lower_uniform_sources.c index ebcfe795e47..1d318225f5b 100644 --- a/src/asahi/compiler/agx_lower_uniform_sources.c +++ b/src/asahi/compiler/agx_lower_uniform_sources.c @@ -22,6 +22,7 @@ should_lower(enum agx_opcode op, agx_index uniform, unsigned src_index) bool high = uniform.value >= 256; switch (op) { + case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: return src_index != 1 && src_index != 2; diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 7616af8badb..7bc6f7f8237 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -254,9 +254,10 @@ op("texture_sample", encoding_32 = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW, GATHER]) -op("texture_load", - encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE - srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET]) +for memory, can_reorder in [("texture", True), ("image", False)]: + op(f"{memory}_load", encoding_32 = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE + srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET], + can_reorder = can_reorder) # sources are base, index op("device_load", diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 6b6ef3d1752..bd74af7b2f2 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -265,7 +265,7 @@ agx_optimizer_forward(agx_context *ctx) /* Inline immediates if we can. TODO: systematic */ if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE && - I->op != AGX_OPCODE_TEXTURE_LOAD && + I->op != AGX_OPCODE_IMAGE_LOAD && I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE && I->op != AGX_OPCODE_BLOCK_IMAGE_STORE) agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float); diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 59563e84770..afd06e79d03 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -774,6 +774,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, } case AGX_OPCODE_TEXTURE_LOAD: + case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: { assert(I->mask != 0); assert(I->format <= 0x10); @@ -795,6 +796,18 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, unsigned q3 = 12; // XXX unsigned kill = 0; // helper invocation kill bit + /* Set bit 43 for image loads. This seems to makes sure that image loads + * get the value written by the latest image store, not some other image + * store that was already in flight, fixing + * + * KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs + * + * Apple seems to set this bit unconditionally for read/write image loads + * and never for readonly image loads. Some sort of cache control. + */ + if (I->op == AGX_OPCODE_IMAGE_LOAD) + q3 |= 1; + uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) | ((I->dim >> 3) << 7) | ((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) | @@ -804,7 +817,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, bool L = (extend != 0); uint64_t raw = - 0x31 | ((I->op == AGX_OPCODE_TEXTURE_LOAD) ? (1 << 6) : 0) | + 0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) | (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) | (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) | diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index b5d523d5ecc..1fea6f7519f 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -44,6 +44,7 @@ agx_write_registers(const agx_instr *I, unsigned d) assert(1 <= I->channels && I->channels <= 4); return I->channels * size; + case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: /* Even when masked out, these clobber 4 registers */ @@ -238,6 +239,7 @@ agx_read_registers(const agx_instr *I, unsigned s) else return size; + case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: if (s == 0) {