diff --git a/src/panfrost/midgard/disassemble.c b/src/panfrost/midgard/disassemble.c index cfce1110318..d4354388959 100644 --- a/src/panfrost/midgard/disassemble.c +++ b/src/panfrost/midgard/disassemble.c @@ -1366,9 +1366,9 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned if (texture->offset_register) { printf(" + "); - bool full = texture->offset_x & 1; - bool select = texture->offset_x & 2; - bool upper = texture->offset_x & 4; + bool full = texture->offset & 1; + bool select = texture->offset & 2; + bool upper = texture->offset & 4; printf("%sr%u", full ? "" : "h", in_reg_base + select); assert(!(texture->out_full && texture->out_upper)); @@ -1377,30 +1377,19 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned if (upper) printf("'"); - /* The less questions you ask, the better. */ - - unsigned swizzle_lo, swizzle_hi; - unsigned orig_y = texture->offset_y; - unsigned orig_z = texture->offset_z; - - memcpy(&swizzle_lo, &orig_y, sizeof(unsigned)); - memcpy(&swizzle_hi, &orig_z, sizeof(unsigned)); - - /* Duplicate hi swizzle over */ - assert(swizzle_hi < 4); - swizzle_hi = (swizzle_hi << 2) | swizzle_hi; - - unsigned swiz = (swizzle_lo << 4) | swizzle_hi; - unsigned reversed = util_bitreverse(swiz) >> 24; - print_swizzle_vec4(reversed, false, false); + print_swizzle_vec4(texture->offset >> 3, false, false); printf(", "); - } else if (texture->offset_x || texture->offset_y || texture->offset_z) { + } else if (texture->offset) { /* Only select ops allow negative immediate offsets, verify */ - bool neg_x = texture->offset_x < 0; - bool neg_y = texture->offset_y < 0; - bool neg_z = texture->offset_z < 0; + signed offset_x = (texture->offset & 0xF); + signed offset_y = ((texture->offset >> 4) & 0xF); + signed offset_z = ((texture->offset >> 8) & 0xF); + + bool neg_x = offset_x < 0; + bool neg_y = offset_y < 0; + bool neg_z = offset_z < 0; bool any_neg = neg_x || neg_y || neg_z; if (any_neg && texture->op != TEXTURE_OP_TEXEL_FETCH) @@ -1408,10 +1397,7 @@ print_texture_word(uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned /* Regardless, just print the immediate offset */ - printf(" + <%d, %d, %d>, ", - texture->offset_x, - texture->offset_y, - texture->offset_z); + printf(" + <%d, %d, %d>, ", offset_x, offset_y, offset_z); } else { printf(", "); } diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h index ea026621db3..807f8f1ede9 100644 --- a/src/panfrost/midgard/midgard.h +++ b/src/panfrost/midgard/midgard.h @@ -691,16 +691,30 @@ __attribute__((__packed__)) /* In immediate mode, each offset field is an immediate range [0, 7]. * * In register mode, offset_x becomes a register full / select / upper - * triplet and a vec3 swizzle is splattered across offset_y/offset_z in - * a genuinely bizarre way. + * triplet followed by a vec3 swizzle is splattered across + * offset_y/offset_z in a genuinely bizarre way. * * For texel fetches in immediate mode, the range is the full [-8, 7], * but for normal texturing the top bit must be zero and a register - * used instead. It's not clear where this limitation is from. */ + * used instead. It's not clear where this limitation is from. + * + * union { + * struct { + * signed offset_x : 4; + * signed offset_y : 4; + * signed offset_z : 4; + * } immediate; + * struct { + * bool full : 1; + * bool select : 1; + * bool upper : 1; + * unsigned swizzle : 8; + * unsigned zero : 1; + * } register; + * } + */ - signed offset_x : 4; - signed offset_y : 4; - signed offset_z : 4; + unsigned offset : 12; /* In immediate bias mode, for a normal texture op, this is * texture bias, computed as int(2^8 * frac(biasf)), with diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 7d116e176bc..65e4d246282 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -518,6 +518,13 @@ allocate_registers(compiler_context *ctx, bool *spilled) set_class(l->class, ins->src[1], REG_CLASS_TEXR); set_class(l->class, ins->src[2], REG_CLASS_TEXR); set_class(l->class, ins->src[3], REG_CLASS_TEXR); + + /* Texture offsets need to be aligned to vec4, since + * the swizzle for x is forced to x in hardware, while + * the other components are free. TODO: Relax to 8 for + * half-registers if that ever occurs. */ + + //lcra_restrict_range(l, ins->src[3], 16); } } @@ -549,13 +556,6 @@ allocate_registers(compiler_context *ctx, bool *spilled) return l; } -/* Reverses 2 bits, used to pack swizzles of offsets for some reason */ - -static unsigned -mir_reverse2(unsigned in) -{ - return (in >> 1) | ((in & 1) << 1); -} /* Once registers have been decided via register allocation * (allocate_registers), we need to rewrite the MIR to use registers instead of @@ -694,20 +694,20 @@ install_registers_instr( /* If there is an offset register, install it */ if (ins->src[3] != ~0) { - ins->texture.offset_x = - (1) | /* full */ - (offset.reg & 1) << 1 | /* select */ - 0 << 2; /* upper */ - unsigned x = offset.offset / 4; unsigned y = x + 1; unsigned z = x + 2; - ins->texture.offset_y = - mir_reverse2(y) | (mir_reverse2(x) << 2); + /* Check range, TODO: half-registers */ + assert(z < 4); - ins->texture.offset_z = - mir_reverse2(z); + ins->texture.offset = + (1) | /* full */ + (offset.reg & 1) << 1 | /* select */ + (0 << 2) | /* upper */ + (x << 3) | /* swizzle */ + (y << 5) | /* swizzle */ + (z << 7); /* swizzle */ } break;