mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
pan/cs: add helpers to emit contiguous csf code blocks
Add support for block, a monolithic sequence of instruction that must live in a virtually contiguous portion of code, and label, a reference to a specific position in a block. Signed-off-by: Antonino Maniscalco <antonino.maniscalco@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Louis-Francis Ratté-Boulianne <lfrb@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30583>
This commit is contained in:
parent
8b8de5ad9d
commit
50d2396b7e
1 changed files with 185 additions and 16 deletions
|
|
@ -29,6 +29,9 @@
|
|||
|
||||
#include "gen_macros.h"
|
||||
|
||||
#include "util/list.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
/*
|
||||
* cs_builder implements a builder for CSF command streams. It manages the
|
||||
* allocation and overflow behaviour of queues and provides helpers for emitting
|
||||
|
|
@ -83,6 +86,32 @@ struct cs_chunk {
|
|||
};
|
||||
};
|
||||
|
||||
/* Monolithic sequence of instruction. Must live in a virtually contiguous
|
||||
* portion of code.
|
||||
*/
|
||||
struct cs_block {
|
||||
/* Used to insert the block in the block stack. */
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
#define CS_LABEL_INVALID_POS ~0u
|
||||
|
||||
/* Labels can only be used inside a cs_block. They can be defined and
|
||||
* referenced before they are set to point to a specific position
|
||||
* in the block. */
|
||||
struct cs_label {
|
||||
/* The last reference we have seen pointing to this block before
|
||||
* it was set. If set to CS_LABEL_INVALID_POS, no forward reference
|
||||
* pointing to this label exist.
|
||||
*/
|
||||
uint32_t last_forward_ref;
|
||||
|
||||
/* The label target. If set to CS_LABEL_INVALID_POS, the label has
|
||||
* not been set yet.
|
||||
*/
|
||||
uint32_t target;
|
||||
};
|
||||
|
||||
struct cs_builder {
|
||||
/* CS builder configuration */
|
||||
struct cs_builder_conf conf;
|
||||
|
|
@ -96,6 +125,16 @@ struct cs_builder {
|
|||
/* Current CS chunk. */
|
||||
struct cs_chunk cur_chunk;
|
||||
|
||||
/* Temporary storage for inner blocks that need to be built
|
||||
* and copied in one monolithic sequence of instructions with no
|
||||
* jump in the middle.
|
||||
*/
|
||||
struct {
|
||||
struct list_head stack;
|
||||
struct cs_block *cur;
|
||||
struct util_dynarray instrs;
|
||||
} blocks;
|
||||
|
||||
/* Move immediate instruction at the end of the last CS chunk that needs to
|
||||
* be patched with the final length of the current CS chunk in order to
|
||||
* facilitate correct overflow behaviour.
|
||||
|
|
@ -122,6 +161,9 @@ cs_builder_init(struct cs_builder *b, const struct cs_builder_conf *conf,
|
|||
* at least that too.
|
||||
*/
|
||||
b->conf.nr_kernel_registers = MAX2(b->conf.nr_kernel_registers, 3);
|
||||
|
||||
list_inithead(&b->blocks.stack);
|
||||
util_dynarray_init(&b->blocks.instrs, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -191,6 +233,8 @@ cs_finish(struct cs_builder *b)
|
|||
|
||||
/* This prevents adding instructions after that point. */
|
||||
memset(&b->cur_chunk, 0, sizeof(b->cur_chunk));
|
||||
|
||||
util_dynarray_fini(&b->blocks.instrs);
|
||||
}
|
||||
|
||||
enum cs_index_type {
|
||||
|
|
@ -295,7 +339,7 @@ cs_extract32(struct cs_builder *b, struct cs_index idx, unsigned word)
|
|||
#define JUMP_SEQ_INSTR_COUNT 4
|
||||
|
||||
static inline void *
|
||||
cs_alloc_ins(struct cs_builder *b)
|
||||
cs_alloc_ins(struct cs_builder *b, uint32_t num_instrs)
|
||||
{
|
||||
/* If an allocation failure happened before, we just discard all following
|
||||
* instructions.
|
||||
|
|
@ -303,6 +347,9 @@ cs_alloc_ins(struct cs_builder *b)
|
|||
if (unlikely(!cs_is_valid(b)))
|
||||
return &b->discard_instr_slot;
|
||||
|
||||
if (b->blocks.cur)
|
||||
return util_dynarray_grow(&b->blocks.instrs, uint64_t, num_instrs);
|
||||
|
||||
/* Lazy root chunk allocation. */
|
||||
if (unlikely(!b->root_chunk.buffer.cpu)) {
|
||||
b->root_chunk.buffer = b->conf.alloc_buffer(b->conf.cookie);
|
||||
|
|
@ -317,7 +364,7 @@ cs_alloc_ins(struct cs_builder *b)
|
|||
* We actually do this a few instructions before running out, because the
|
||||
* sequence to jump to a new queue takes multiple instructions.
|
||||
*/
|
||||
if (unlikely((b->cur_chunk.size + JUMP_SEQ_INSTR_COUNT) >
|
||||
if (unlikely((b->cur_chunk.size + num_instrs + JUMP_SEQ_INSTR_COUNT) >
|
||||
b->cur_chunk.buffer.capacity)) {
|
||||
/* Now, allocate a new chunk */
|
||||
struct cs_buffer newbuf = b->conf.alloc_buffer(b->conf.cookie);
|
||||
|
|
@ -360,8 +407,10 @@ cs_alloc_ins(struct cs_builder *b)
|
|||
b->cur_chunk.pos = 0;
|
||||
}
|
||||
|
||||
assert(b->cur_chunk.size < b->cur_chunk.buffer.capacity);
|
||||
return b->cur_chunk.buffer.cpu + (b->cur_chunk.pos++);
|
||||
assert(b->cur_chunk.size + num_instrs - 1 < b->cur_chunk.buffer.capacity);
|
||||
uint32_t pos = b->cur_chunk.pos;
|
||||
b->cur_chunk.pos += num_instrs;
|
||||
return b->cur_chunk.buffer.cpu + pos;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -369,7 +418,7 @@ cs_alloc_ins(struct cs_builder *b)
|
|||
* to be separated out being pan_pack can evaluate its argument multiple times,
|
||||
* yet cs_alloc has side effects.
|
||||
*/
|
||||
#define cs_emit(b, T, cfg) pan_pack(cs_alloc_ins(b), CS_##T, cfg)
|
||||
#define cs_emit(b, T, cfg) pan_pack(cs_alloc_ins(b, 1), CS_##T, cfg)
|
||||
|
||||
/* Asynchronous operations take a mask of scoreboard slots to wait on
|
||||
* before executing the instruction, and signal a scoreboard slot when
|
||||
|
|
@ -429,6 +478,137 @@ cs_move48_to(struct cs_builder *b, struct cs_index dest, uint64_t imm)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_block_start(struct cs_builder *b, struct cs_block *block)
|
||||
{
|
||||
list_addtail(&block->node, &b->blocks.stack);
|
||||
b->blocks.cur = block;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_block_end(struct cs_builder *b)
|
||||
{
|
||||
assert(b->blocks.cur);
|
||||
|
||||
list_del(&b->blocks.cur->node);
|
||||
|
||||
if (!list_is_empty(&b->blocks.stack)) {
|
||||
b->blocks.cur = list_last_entry(&b->blocks.stack, struct cs_block, node);
|
||||
return;
|
||||
}
|
||||
|
||||
b->blocks.cur = NULL;
|
||||
|
||||
uint32_t num_instrs =
|
||||
util_dynarray_num_elements(&b->blocks.instrs, uint64_t);
|
||||
void *buffer = cs_alloc_ins(b, num_instrs);
|
||||
|
||||
memcpy(buffer, b->blocks.instrs.data, b->blocks.instrs.size);
|
||||
util_dynarray_clear(&b->blocks.instrs);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
cs_block_next_pos(struct cs_builder *b)
|
||||
{
|
||||
assert(b->blocks.cur);
|
||||
|
||||
return util_dynarray_num_elements(&b->blocks.instrs, uint64_t);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_branch(struct cs_builder *b, int offset, enum mali_cs_condition cond,
|
||||
struct cs_index val)
|
||||
{
|
||||
cs_emit(b, BRANCH, I) {
|
||||
I.offset = offset;
|
||||
I.condition = cond;
|
||||
I.value = cs_to_reg32(val);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_branch_label(struct cs_builder *b, struct cs_label *label,
|
||||
enum mali_cs_condition cond, struct cs_index val)
|
||||
{
|
||||
assert(b->blocks.cur);
|
||||
|
||||
if (label->target == CS_LABEL_INVALID_POS) {
|
||||
uint32_t branch_ins_pos = cs_block_next_pos(b);
|
||||
|
||||
/* Instead of emitting a BRANCH with the final offset, we record the
|
||||
* diff between the current branch, and the previous branch that was
|
||||
* referencing this unset label. This way we build a single link list
|
||||
* that can be walked when the label is set with cs_set_label().
|
||||
* We use -1 as the end-of-list marker.
|
||||
*/
|
||||
int16_t offset = -1;
|
||||
if (label->last_forward_ref != CS_LABEL_INVALID_POS) {
|
||||
assert(label->last_forward_ref < branch_ins_pos);
|
||||
assert(branch_ins_pos - label->last_forward_ref <= INT16_MAX);
|
||||
offset = branch_ins_pos - label->last_forward_ref;
|
||||
}
|
||||
|
||||
cs_emit(b, BRANCH, I) {
|
||||
I.offset = offset;
|
||||
I.condition = cond;
|
||||
I.value = cs_to_reg32(val);
|
||||
}
|
||||
|
||||
label->last_forward_ref = branch_ins_pos;
|
||||
} else {
|
||||
int32_t offset = label->target - cs_block_next_pos(b) - 1;
|
||||
|
||||
/* The branch target is encoded in a 16-bit signed integer, make sure we
|
||||
* don't underflow.
|
||||
*/
|
||||
assert(offset >= INT16_MIN);
|
||||
|
||||
/* Backward references are easy, we can emit them immediately. */
|
||||
cs_emit(b, BRANCH, I) {
|
||||
I.offset = offset;
|
||||
I.condition = cond;
|
||||
I.value = cs_to_reg32(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_label_init(struct cs_label *label)
|
||||
{
|
||||
label->last_forward_ref = CS_LABEL_INVALID_POS;
|
||||
label->target = CS_LABEL_INVALID_POS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_set_label(struct cs_builder *b, struct cs_label *label)
|
||||
{
|
||||
assert(label->target == CS_LABEL_INVALID_POS);
|
||||
label->target = cs_block_next_pos(b);
|
||||
|
||||
for (uint32_t next_forward_ref, forward_ref = label->last_forward_ref;
|
||||
forward_ref != CS_LABEL_INVALID_POS; forward_ref = next_forward_ref) {
|
||||
uint64_t *ins =
|
||||
util_dynarray_element(&b->blocks.instrs, uint64_t, forward_ref);
|
||||
|
||||
assert(forward_ref < label->target);
|
||||
assert(label->target - forward_ref <= INT16_MAX);
|
||||
|
||||
/* Save the next forward reference to this target before overwritting
|
||||
* it with the final offset.
|
||||
*/
|
||||
int16_t offset = *ins & BITFIELD64_MASK(16);
|
||||
|
||||
next_forward_ref =
|
||||
offset > 0 ? forward_ref - offset : CS_LABEL_INVALID_POS;
|
||||
|
||||
assert(next_forward_ref == CS_LABEL_INVALID_POS ||
|
||||
next_forward_ref < forward_ref);
|
||||
|
||||
*ins &= ~BITFIELD64_MASK(16);
|
||||
*ins |= label->target - forward_ref - 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_move64_to(struct cs_builder *b, struct cs_index dest, uint64_t imm)
|
||||
{
|
||||
|
|
@ -665,17 +845,6 @@ cs_store64(struct cs_builder *b, struct cs_index data, struct cs_index address,
|
|||
cs_store(b, data, address, BITFIELD_MASK(2), offset);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cs_branch(struct cs_builder *b, int offset, enum mali_cs_condition cond,
|
||||
struct cs_index val)
|
||||
{
|
||||
cs_emit(b, BRANCH, I) {
|
||||
I.offset = offset;
|
||||
I.condition = cond;
|
||||
I.value = cs_to_reg32(val);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Select which scoreboard entry will track endpoint tasks and other tasks
|
||||
* respectively. Pass to cs_wait to wait later.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue