mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
pan/bi: Ingest vecN directly (again)
Last time, I swear. We still generate writemasks but SSA-like ones and do the lowering ourselves. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4288>
This commit is contained in:
parent
04509dae7f
commit
e0a51d5308
7 changed files with 89 additions and 48 deletions
|
|
@ -2,6 +2,7 @@ bifrost_FILES := \
|
|||
bifrost/bifrost.h \
|
||||
bifrost/bifrost_compile.c \
|
||||
bifrost/bifrost_compile.h \
|
||||
bifrost/bi_lower_combine.c \
|
||||
bifrost/bi_tables.c \
|
||||
bifrost/bi_schedule.c \
|
||||
bifrost/bi_print.c \
|
||||
|
|
|
|||
68
src/panfrost/bifrost/bi_lower_combine.c
Normal file
68
src/panfrost/bifrost/bi_lower_combine.c
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright (C) 2020 Collabora, Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
/* NIR creates vectors as vecN ops, which we represent by a synthetic
|
||||
* BI_COMBINE instruction, e.g.:
|
||||
*
|
||||
* v = combine x, y, z, w
|
||||
*
|
||||
* These combines need to be lowered by the pass in this file.
|
||||
*/
|
||||
|
||||
static void
|
||||
bi_insert_combine_mov(bi_context *ctx, bi_instruction *parent, unsigned comp)
|
||||
{
|
||||
unsigned bits = nir_alu_type_get_type_size(parent->dest_type);
|
||||
unsigned bytes = bits / 8;
|
||||
|
||||
bi_instruction move = {
|
||||
.type = BI_MOV,
|
||||
.dest = parent->dest,
|
||||
.dest_type = parent->dest_type,
|
||||
.writemask = ((1 << bytes) - 1) << (bytes * comp),
|
||||
.src = { parent->src[comp] },
|
||||
.src_types = { parent->dest_type },
|
||||
.swizzle = { { parent->swizzle[comp][0] } }
|
||||
};
|
||||
|
||||
bi_emit_before(ctx, parent, move);
|
||||
}
|
||||
|
||||
void
|
||||
bi_lower_combine(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
bi_foreach_instr_in_block_safe(block, ins) {
|
||||
if (ins->type != BI_COMBINE) continue;
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (!ins->src[s])
|
||||
break;
|
||||
|
||||
bi_insert_combine_mov(ctx, ins, s);
|
||||
}
|
||||
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
}
|
||||
|
|
@ -132,6 +132,7 @@ bi_class_name(enum bi_class cl)
|
|||
case BI_CMP: return "cmp";
|
||||
case BI_BLEND: return "blend";
|
||||
case BI_BITWISE: return "bitwise";
|
||||
case BI_COMBINE: return "combine";
|
||||
case BI_CONVERT: return "convert";
|
||||
case BI_CSEL: return "csel";
|
||||
case BI_DISCARD: return "discard";
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
|
|||
[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
|
||||
[BI_BLEND] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR,
|
||||
[BI_BITWISE] = BI_GENERIC | BI_SCHED_ALL,
|
||||
[BI_COMBINE] = 0,
|
||||
[BI_CONVERT] = BI_SCHED_ALL | BI_SWIZZLABLE,
|
||||
[BI_CSEL] = BI_SCHED_FMA,
|
||||
[BI_DISCARD] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD,
|
||||
|
|
|
|||
|
|
@ -381,6 +381,15 @@ bi_class_for_nir_alu(nir_op op)
|
|||
case nir_op_u2f64:
|
||||
return BI_CONVERT;
|
||||
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
return BI_COMBINE;
|
||||
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
unreachable("should've been lowered");
|
||||
|
||||
case nir_op_ffma:
|
||||
case nir_op_fmul:
|
||||
return BI_FMA;
|
||||
|
|
@ -519,7 +528,10 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr)
|
|||
/* Construct a writemask */
|
||||
unsigned bits_per_comp = instr->dest.dest.ssa.bit_size;
|
||||
unsigned comps = instr->dest.dest.ssa.num_components;
|
||||
assert(comps == 1);
|
||||
|
||||
if (alu.type != BI_COMBINE)
|
||||
assert(comps == 1);
|
||||
|
||||
unsigned bits = bits_per_comp * comps;
|
||||
unsigned bytes = bits / 8;
|
||||
alu.writemask = (1 << bytes) - 1;
|
||||
|
|
@ -909,53 +921,8 @@ bi_optimize_nir(nir_shader *nir)
|
|||
|
||||
/* Take us out of SSA */
|
||||
NIR_PASS(progress, nir, nir_lower_locals_to_regs);
|
||||
NIR_PASS(progress, nir, nir_convert_from_ssa, true);
|
||||
|
||||
/* We're a primary scalar architecture but there's enough vector that
|
||||
* we use a vector IR so let's not also deal with scalar hacks on top
|
||||
* of the vector hacks */
|
||||
|
||||
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
|
||||
NIR_PASS(progress, nir, nir_lower_vec_to_movs);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_insert_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp)
|
||||
{
|
||||
bi_instruction move = {
|
||||
.type = BI_MOV,
|
||||
.dest = parent->dest,
|
||||
.dest_type = nir_type_uint32,
|
||||
.writemask = (0xF << (4 * comp)),
|
||||
.src = { parent->src[0] },
|
||||
.src_types = { nir_type_uint32 },
|
||||
.swizzle = { { comp } }
|
||||
};
|
||||
|
||||
bi_emit_before(ctx, parent, move);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_lower_mov(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
bi_foreach_instr_in_block_safe(block, ins) {
|
||||
if (ins->type != BI_MOV) continue;
|
||||
if (util_bitcount(ins->writemask) <= 4) continue;
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
unsigned quad = (ins->writemask >> (4 * i)) & 0xF;
|
||||
|
||||
if (quad == 0)
|
||||
continue;
|
||||
else if (quad == 0xF)
|
||||
bi_insert_mov32(ctx, ins, i);
|
||||
else
|
||||
unreachable("TODO: Lowering <32bit moves");
|
||||
}
|
||||
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
NIR_PASS(progress, nir, nir_convert_from_ssa, true);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1003,7 +970,7 @@ bifrost_compile_shader_nir(nir_shader *nir, panfrost_program *program, unsigned
|
|||
|
||||
bi_foreach_block(ctx, _block) {
|
||||
bi_block *block = (bi_block *) _block;
|
||||
bi_lower_mov(ctx, block);
|
||||
bi_lower_combine(ctx, block);
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ enum bi_class {
|
|||
BI_CMP,
|
||||
BI_BLEND,
|
||||
BI_BITWISE,
|
||||
BI_COMBINE,
|
||||
BI_CONVERT,
|
||||
BI_CSEL,
|
||||
BI_DISCARD,
|
||||
|
|
@ -541,6 +542,7 @@ uint64_t bi_get_immediate(bi_instruction *ins, unsigned index);
|
|||
|
||||
/* BIR passes */
|
||||
|
||||
void bi_lower_combine(bi_context *ctx, bi_block *block);
|
||||
bool bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block);
|
||||
void bi_schedule(bi_context *ctx);
|
||||
void bi_register_allocate(bi_context *ctx);
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
libpanfrost_bifrost_files = files(
|
||||
'disassemble.c',
|
||||
'bi_liveness.c',
|
||||
'bi_lower_combine.c',
|
||||
'bi_print.c',
|
||||
'bi_opt_dce.c',
|
||||
'bi_pack.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue