mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 07:10:15 +01:00
pan/bi: Lower combines to rewrites for scalars
This avoids unneeded moves for scalars. It still generates suboptimal code for vectors, however. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4288>
This commit is contained in:
parent
e0a51d5308
commit
5a3493c536
1 changed files with 141 additions and 7 deletions
|
|
@ -28,18 +28,32 @@
|
|||
*
|
||||
* v = combine x, y, z, w
|
||||
*
|
||||
* These combines need to be lowered by the pass in this file.
|
||||
* These combines need to be lowered by the pass in this file. Fix a given
|
||||
* source at component c.
|
||||
*
|
||||
* First suppose the source is SSA. If it is also scalar, then we may rewrite
|
||||
* the destination of the generating instruction (unique by SSA+scalar) to
|
||||
* write to v.c, and rewrite each of its uses to swizzle out .c instead of .x
|
||||
* (the original by scalar). If it is vector, there are two cases. If the
|
||||
* component c is `x`, we are accessing v.x, and each of the succeeding
|
||||
* components y, z... up to the last component of the vector are accessed
|
||||
* sequentially, then we may perform the same rewrite. If this is not the case,
|
||||
* rewriting would require a swizzle or writemask (TODO), so we fallback on a
|
||||
* move.
|
||||
*
|
||||
* Otherwise is the source is not SSA, we also fallback on a move. We could
|
||||
* probably do better.
|
||||
*/
|
||||
|
||||
static void
|
||||
bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp)
|
||||
bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R)
|
||||
{
|
||||
unsigned bits = nir_alu_type_get_type_size(parent->dest_type);
|
||||
unsigned bytes = bits / 8;
|
||||
|
||||
bi_instruction move = {
|
||||
.type = BI_MOV,
|
||||
.dest = parent->dest,
|
||||
.dest = R,
|
||||
.dest_type = parent->dest_type,
|
||||
.writemask = ((1 << bytes) - 1) << (bytes * comp),
|
||||
.src = { parent->src[comp] },
|
||||
|
|
@ -50,19 +64,139 @@ bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp)
|
|||
bi_emit_before(ctx, parent, move);
|
||||
}
|
||||
|
||||
/* Gets the instruction generating a given source. Combine lowering is
|
||||
* accidentally O(n^2) right now because this function is O(n) instead of O(1).
|
||||
* If this pass is slow, this cost can be avoided in favour for better
|
||||
* bookkeeping. */
|
||||
|
||||
static bi_instruction *
|
||||
bi_get_parent(bi_context *ctx, unsigned idx, unsigned mask)
|
||||
{
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (ins->dest == idx)
|
||||
if ((ins->writemask & mask) == mask)
|
||||
return ins;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Rewrites uses of an index. Again, this could be O(n) to the program but is
|
||||
* currently O(nc) to the program and number of combines, so the pass becomes
|
||||
* effectively O(n^2). Better bookkeeping would bring down to linear if that's
|
||||
* an issue. */
|
||||
|
||||
static void
|
||||
bi_rewrite_uses(bi_context *ctx,
|
||||
unsigned old, unsigned oldc,
|
||||
unsigned new, unsigned newc)
|
||||
{
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s] != old) continue;
|
||||
|
||||
for (unsigned i = 0; i < 16; ++i)
|
||||
ins->swizzle[s][0] += (newc - oldc);
|
||||
|
||||
ins->src[s] = new;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Shifts the writemask of an instruction by a specified byte count,
|
||||
* rotating the sources to compensate. Returns true if successful, and
|
||||
* returns false if not (nondestructive in this case). */
|
||||
|
||||
static bool
|
||||
bi_shift_mask_scalar(bi_instruction *ins, signed shift)
|
||||
{
|
||||
unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
|
||||
unsigned bytes = sz / 8;
|
||||
|
||||
/* If things are misaligned, we bail. Check if shift % bytes is
|
||||
* nonzero. Note bytes is a power-of-two. */
|
||||
if (shift & (bytes - 1))
|
||||
return false;
|
||||
|
||||
/* Ensure there are no funny types */
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s] && nir_alu_type_get_type_size(ins->src_types[s]) != sz)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Otherwise, shift is divisible by bytes, and all relevant src types
|
||||
* are the same size as the dest type. */
|
||||
ins->writemask <<= shift;
|
||||
|
||||
/* Shift swizzle so old i'th component is accessed by new (i + j)'th
|
||||
* component where j is component shift */
|
||||
signed component_shift = shift / bytes;
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (!ins->src[s]) continue;
|
||||
|
||||
size_t overlap = sizeof(ins->swizzle[s]) - abs(component_shift);
|
||||
|
||||
if (component_shift > 0)
|
||||
memmove(ins->swizzle[s] + component_shift, ins->swizzle[s], overlap);
|
||||
else
|
||||
memmove(ins->swizzle[s], ins->swizzle[s] - component_shift, overlap);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Tries to lower a given source of a combine to an appropriate rewrite,
|
||||
* returning true if successful, and false with no changes otherwise. */
|
||||
|
||||
static bool
|
||||
bi_lower_combine_src(bi_context *ctx, bi_instruction *ins, unsigned s, unsigned R)
|
||||
{
|
||||
unsigned src = ins->src[s];
|
||||
|
||||
/* We currently only handle SSA */
|
||||
|
||||
if (!src) return false;
|
||||
if (src & (BIR_SPECIAL | BIR_IS_REG)) return false;
|
||||
|
||||
/* We are SSA. Lookup the generating instruction. */
|
||||
unsigned bytes = nir_alu_type_get_type_size(ins->dest_type) / 8;
|
||||
|
||||
bi_instruction *parent = bi_get_parent(ctx, src,
|
||||
0xF << (ins->swizzle[s][0] * bytes));
|
||||
|
||||
if (!parent) return false;
|
||||
|
||||
/* We have a parent instuction, sanity check the typesize */
|
||||
unsigned pbytes = nir_alu_type_get_type_size(parent->dest_type) / 8;
|
||||
if (pbytes != bytes) return false;
|
||||
|
||||
/* Scalar? */
|
||||
if (parent->writemask != ((1 << bytes) - 1)) return false;
|
||||
if (!bi_shift_mask_scalar(parent, bytes * s)) return false;
|
||||
bi_rewrite_uses(ctx, parent->dest, 0, R, s);
|
||||
parent->dest = R;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_lower_combine(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
bi_foreach_instr_in_block_safe(block, ins) {
|
||||
if (ins->type != BI_COMBINE) continue;
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (!ins->src[s])
|
||||
break;
|
||||
/* The vector itself can't be shifted */
|
||||
assert(ins->writemask & 0x1);
|
||||
|
||||
bi_insert_combine_mov(ctx, ins, s);
|
||||
unsigned R = bi_make_temp_reg(ctx);
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (!bi_lower_combine_src(ctx, ins, s, R))
|
||||
bi_insert_combine_mov(ctx, ins, s, R);
|
||||
}
|
||||
|
||||
|
||||
bi_rewrite_uses(ctx, ins->dest, 0, R, 0);
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue