asahi: Clang-format the subtree

See 0afd691f29 ("panfrost: clang-format the tree") for why I'm doing this.
Asahi already mostly follows Mesa style so this doesn't do much. But this means
we can all stop thinking about formatting and trust the robot poets to do that
for us.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20434>
This commit is contained in:
Alyssa Rosenzweig 2022-12-27 17:36:08 -05:00 committed by Marge Bot
parent bf93fd46e2
commit f603d8ce9e
64 changed files with 2550 additions and 2543 deletions

File diff suppressed because it is too large Load diff

View file

@ -228,15 +228,12 @@ struct agx_shader_key {
};
};
void
agx_preprocess_nir(nir_shader *nir);
void agx_preprocess_nir(nir_shader *nir);
void
agx_compile_shader_nir(nir_shader *nir,
struct agx_shader_key *key,
struct util_debug_callback *debug,
struct util_dynarray *binary,
struct agx_shader_info *out);
void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
struct util_debug_callback *debug,
struct util_dynarray *binary,
struct agx_shader_info *out);
static const nir_shader_compiler_options agx_nir_options = {
.lower_fdiv = true,
@ -275,8 +272,10 @@ static const nir_shader_compiler_options agx_nir_options = {
.max_unroll_iterations = 32,
.lower_uniforms_to_ubo = true,
.force_indirect_unrolling_sampler = true,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.lower_int64_options = (nir_lower_int64_options) ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
.force_indirect_unrolling =
(nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.lower_int64_options =
(nir_lower_int64_options) ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
.lower_doubles_options = nir_lower_dmod,
};

View file

@ -26,13 +26,13 @@
#define __AGX_COMPILER_H
#include "compiler/nir/nir.h"
#include "util/u_math.h"
#include "util/half_float.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "util/u_worklist.h"
#include "agx_compile.h"
#include "agx_opcodes.h"
#include "agx_minifloat.h"
#include "agx_opcodes.h"
#ifdef __cplusplus
extern "C" {
@ -66,19 +66,18 @@ enum agx_index_type {
AGX_INDEX_REGISTER = 4,
};
enum agx_size {
AGX_SIZE_16 = 0,
AGX_SIZE_32 = 1,
AGX_SIZE_64 = 2
};
enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };
static inline unsigned
agx_size_align_16(enum agx_size size)
{
switch (size) {
case AGX_SIZE_16: return 1;
case AGX_SIZE_32: return 2;
case AGX_SIZE_64: return 4;
case AGX_SIZE_16:
return 1;
case AGX_SIZE_32:
return 2;
case AGX_SIZE_64:
return 4;
}
unreachable("Invalid size");
@ -86,7 +85,8 @@ agx_size_align_16(enum agx_size size)
/* Keep synced with hash_index */
typedef struct {
/* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
/* Sufficient for as many SSA values as we need. Immediates and uniforms fit
* in 16-bits */
unsigned value : 22;
/* Indicates that this source kills the referenced value (because it is the
@ -95,21 +95,21 @@ typedef struct {
bool kill : 1;
/* Cache hints */
bool cache : 1;
bool cache : 1;
bool discard : 1;
/* src - float modifiers */
bool abs : 1;
bool neg : 1;
enum agx_size size : 2;
enum agx_size size : 2;
enum agx_index_type type : 3;
} agx_index;
static inline agx_index
agx_get_index(unsigned value, enum agx_size size)
{
return (agx_index) {
return (agx_index){
.value = value,
.size = size,
.type = AGX_INDEX_NORMAL,
@ -121,7 +121,7 @@ agx_immediate(uint32_t imm)
{
assert(imm < (1 << 16) && "overflowed immediate");
return (agx_index) {
return (agx_index){
.value = imm,
.size = AGX_SIZE_16,
.type = AGX_INDEX_IMMEDIATE,
@ -141,7 +141,7 @@ agx_register(uint32_t imm, enum agx_size size)
{
assert(imm < AGX_NUM_REGS);
return (agx_index) {
return (agx_index){
.value = imm,
.size = size,
.type = AGX_INDEX_REGISTER,
@ -154,7 +154,7 @@ agx_uniform(uint32_t imm, enum agx_size size)
{
assert(imm < AGX_NUM_UNIFORMS);
return (agx_index) {
return (agx_index){
.value = imm,
.size = size,
.type = AGX_INDEX_UNIFORM,
@ -164,7 +164,7 @@ agx_uniform(uint32_t imm, enum agx_size size)
static inline agx_index
agx_null()
{
return (agx_index) { .type = AGX_INDEX_NULL };
return (agx_index){.type = AGX_INDEX_NULL};
}
static inline agx_index
@ -323,8 +323,8 @@ typedef struct {
/* TODO: Handle tex ops more efficient */
enum agx_dim dim : 4;
bool offset : 1;
bool shadow : 1;
bool offset : 1;
bool shadow : 1;
/* Final st_vary op */
bool last : 1;
@ -446,10 +446,14 @@ agx_size_for_bits(unsigned bits)
switch (bits) {
case 1:
case 8:
case 16: return AGX_SIZE_16;
case 32: return AGX_SIZE_32;
case 64: return AGX_SIZE_64;
default: unreachable("Invalid bitsize");
case 16:
return AGX_SIZE_16;
case 32:
return AGX_SIZE_32;
case 64:
return AGX_SIZE_64;
default:
unreachable("Invalid bitsize");
}
}
@ -459,7 +463,7 @@ agx_src_index(nir_src *src)
assert(src->is_ssa);
return agx_get_index(src->ssa->index,
agx_size_for_bits(nir_src_bit_size(*src)));
agx_size_for_bits(nir_src_bit_size(*src)));
}
static inline agx_index
@ -468,7 +472,7 @@ agx_dest_index(nir_dest *dst)
assert(dst->is_ssa);
return agx_get_index(dst->ssa.index,
agx_size_for_bits(nir_dest_bit_size(*dst)));
agx_size_for_bits(nir_dest_bit_size(*dst)));
}
static inline agx_index
@ -499,91 +503,88 @@ agx_start_block(agx_context *ctx)
/* Iterators for AGX IR */
#define agx_foreach_block(ctx, v) \
#define agx_foreach_block(ctx, v) \
list_for_each_entry(agx_block, v, &ctx->blocks, link)
#define agx_foreach_block_rev(ctx, v) \
#define agx_foreach_block_rev(ctx, v) \
list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
#define agx_foreach_block_from(ctx, from, v) \
#define agx_foreach_block_from(ctx, from, v) \
list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
#define agx_foreach_block_from_rev(ctx, from, v) \
#define agx_foreach_block_from_rev(ctx, from, v) \
list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
#define agx_foreach_instr_in_block(block, v) \
#define agx_foreach_instr_in_block(block, v) \
list_for_each_entry(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_rev(block, v) \
#define agx_foreach_instr_in_block_rev(block, v) \
list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_safe(block, v) \
#define agx_foreach_instr_in_block_safe(block, v) \
list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_safe_rev(block, v) \
#define agx_foreach_instr_in_block_safe_rev(block, v) \
list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
#define agx_foreach_instr_in_block_from(block, v, from) \
#define agx_foreach_instr_in_block_from(block, v, from) \
list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
#define agx_foreach_instr_in_block_from_rev(block, v, from) \
list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
#define agx_foreach_instr_in_block_from_rev(block, v, from) \
list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, \
link)
#define agx_foreach_instr_global(ctx, v) \
agx_foreach_block(ctx, v_block) \
#define agx_foreach_instr_global(ctx, v) \
agx_foreach_block(ctx, v_block) \
agx_foreach_instr_in_block(v_block, v)
#define agx_foreach_instr_global_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
#define agx_foreach_instr_global_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
agx_foreach_instr_in_block_rev(v_block, v)
#define agx_foreach_instr_global_safe(ctx, v) \
agx_foreach_block(ctx, v_block) \
#define agx_foreach_instr_global_safe(ctx, v) \
agx_foreach_block(ctx, v_block) \
agx_foreach_instr_in_block_safe(v_block, v)
#define agx_foreach_instr_global_safe_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
#define agx_foreach_instr_global_safe_rev(ctx, v) \
agx_foreach_block_rev(ctx, v_block) \
agx_foreach_instr_in_block_safe_rev(v_block, v)
/* Based on set_foreach, expanded with automatic type casts */
#define agx_foreach_successor(blk, v) \
agx_block *v; \
agx_block **_v; \
for (_v = (agx_block **) &blk->successors[0], \
v = *_v; \
v != NULL && _v < (agx_block **) &blk->successors[2]; \
_v++, v = *_v) \
#define agx_foreach_successor(blk, v) \
agx_block *v; \
agx_block **_v; \
for (_v = (agx_block **)&blk->successors[0], v = *_v; \
v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)
#define agx_foreach_predecessor(blk, v) \
#define agx_foreach_predecessor(blk, v) \
util_dynarray_foreach(&blk->predecessors, agx_block *, v)
#define agx_foreach_src(ins, v) \
for (unsigned v = 0; v < ins->nr_srcs; ++v)
#define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
#define agx_foreach_dest(ins, v) \
for (unsigned v = 0; v < ins->nr_dests; ++v)
#define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
#define agx_foreach_ssa_src(ins, v) \
agx_foreach_src(ins, v) \
#define agx_foreach_ssa_src(ins, v) \
agx_foreach_src(ins, v) \
if (ins->src[v].type == AGX_INDEX_NORMAL)
#define agx_foreach_ssa_dest(ins, v) \
agx_foreach_dest(ins, v) \
#define agx_foreach_ssa_dest(ins, v) \
agx_foreach_dest(ins, v) \
if (ins->dest[v].type == AGX_INDEX_NORMAL)
/* Phis only come at the start so we stop as soon as we hit a non-phi */
#define agx_foreach_phi_in_block(block, v) \
agx_foreach_instr_in_block(block, v) \
if (v->op != AGX_OPCODE_PHI) \
break; \
#define agx_foreach_phi_in_block(block, v) \
agx_foreach_instr_in_block(block, v) \
if (v->op != AGX_OPCODE_PHI) \
break; \
else
/* Everything else comes after, so we stop as soon as we hit a phi in reverse */
#define agx_foreach_non_phi_in_block_rev(block, v) \
agx_foreach_instr_in_block_rev(block, v) \
if (v->op == AGX_OPCODE_PHI) \
break; \
#define agx_foreach_non_phi_in_block_rev(block, v) \
agx_foreach_instr_in_block_rev(block, v) \
if (v->op == AGX_OPCODE_PHI) \
break; \
else
/*
@ -595,7 +596,8 @@ agx_predecessor_index(agx_block *succ, agx_block *pred)
unsigned index = 0;
agx_foreach_predecessor(succ, x) {
if (*x == pred) return index;
if (*x == pred)
return index;
index++;
}
@ -629,13 +631,13 @@ agx_exit_block(agx_context *ctx)
return last;
}
#define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
#define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
#define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
#define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
#define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index)
#define agx_worklist_pop_head(w) u_worklist_pop_head( w, agx_block, index)
#define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index)
#define agx_worklist_pop_tail(w) u_worklist_pop_tail( w, agx_block, index)
#define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index)
#define agx_worklist_pop_head(w) u_worklist_pop_head(w, agx_block, index)
#define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index)
#define agx_worklist_pop_tail(w) u_worklist_pop_tail(w, agx_block, index)
/* Like in NIR, for use with the builder */
@ -657,7 +659,7 @@ typedef struct {
static inline agx_cursor
agx_after_block(agx_block *block)
{
return (agx_cursor) {
return (agx_cursor){
.option = agx_cursor_after_block,
.block = block,
};
@ -666,7 +668,7 @@ agx_after_block(agx_block *block)
static inline agx_cursor
agx_before_instr(agx_instr *instr)
{
return (agx_cursor) {
return (agx_cursor){
.option = agx_cursor_before_instr,
.instr = instr,
};
@ -675,7 +677,7 @@ agx_before_instr(agx_instr *instr)
static inline agx_cursor
agx_after_instr(agx_instr *instr)
{
return (agx_cursor) {
return (agx_cursor){
.option = agx_cursor_after_instr,
.instr = instr,
};
@ -699,7 +701,6 @@ agx_after_block_logical(agx_block *block)
return agx_after_block(block);
}
static inline agx_cursor
agx_before_nonempty_block(agx_block *block)
{
@ -728,7 +729,7 @@ typedef struct {
static inline agx_builder
agx_init_builder(agx_context *ctx, agx_cursor cursor)
{
return (agx_builder) {
return (agx_builder){
.shader = ctx,
.cursor = cursor,
};
@ -763,12 +764,11 @@ agx_builder_insert(agx_cursor *cursor, agx_instr *I)
/* Uniform file management */
agx_index
agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
unsigned index, unsigned length);
agx_index agx_indexed_sysval(agx_context *ctx, enum agx_push_type type,
enum agx_size size, unsigned index,
unsigned length);
agx_index
agx_vbo_base(agx_context *ctx, unsigned vbo);
agx_index agx_vbo_base(agx_context *ctx, unsigned vbo);
/* Routines defined for AIR */
@ -786,7 +786,11 @@ void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
#ifndef NDEBUG
void agx_validate(agx_context *ctx, const char *after_str);
#else
static inline void agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str) { return; }
static inline void
agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
{
return;
}
#endif
unsigned agx_write_registers(agx_instr *I, unsigned d);
@ -802,8 +806,8 @@ struct agx_copy {
bool done;
};
void
agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies, unsigned n);
void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
unsigned n);
void agx_compute_liveness(agx_context *ctx);
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);

View file

@ -42,7 +42,8 @@ agx_dce(agx_context *ctx)
}
agx_foreach_instr_global_safe_rev(ctx, I) {
if (!agx_opcodes_info[I->op].can_eliminate) continue;
if (!agx_opcodes_info[I->op].can_eliminate)
continue;
bool needed = false;
@ -54,7 +55,7 @@ agx_dce(agx_context *ctx)
*/
if ((I->dest[d].type == AGX_INDEX_NORMAL) &&
!BITSET_TEST(seen, I->dest[d].value))
I->dest[d] = agx_null();
I->dest[d] = agx_null();
/* If the destination is actually needed, the instruction is too */
needed |= (I->dest[d].type != AGX_INDEX_NULL);

View file

@ -22,10 +22,10 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "util/u_memory.h"
#include "util/list.h"
#include "util/set.h"
#include "util/u_memory.h"
#include "agx_compiler.h"
/* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
* we compute live_out from live_in. The intrablock pass is linear-time. It
@ -41,7 +41,7 @@ agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I)
agx_foreach_ssa_src(I, s) {
/* If the source is not live after this instruction, but becomes live
* at this instruction, this is the use that kills the source
* at this instruction, this is the use that kills the source
*/
I->src[s].kill = !BITSET_TEST(live, I->src[s].value);
BITSET_SET(live, I->src[s].value);
@ -77,7 +77,7 @@ agx_compute_liveness(agx_context *ctx)
}
/* Iterate the work list */
while(!u_worklist_is_empty(&worklist)) {
while (!u_worklist_is_empty(&worklist)) {
/* Pop in reverse order since liveness is a backwards pass */
agx_block *blk = agx_worklist_pop_head(&worklist);
@ -90,8 +90,8 @@ agx_compute_liveness(agx_context *ctx)
/* Propagate the live in of the successor (blk) to the live out of
* predecessors.
*
* Phi nodes are logically on the control flow edge and act in parallel. To
* handle when propagating, we kill writes from phis and make live the
* Phi nodes are logically on the control flow edge and act in parallel.
* To handle when propagating, we kill writes from phis and make live the
* corresponding sources.
*/
agx_foreach_predecessor(blk, pred) {

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "agx_builder.h"
#include "agx_compiler.h"
/*
* Lower 64-bit moves to 32-bit moves. Although there are not 64-bit moves in

View file

@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "agx_builder.h"
#include "agx_compiler.h"
/*
* Emits code for
@ -114,13 +114,10 @@ split_32bit_copy(struct copy_ctx *ctx, struct agx_copy *entry)
}
void
agx_emit_parallel_copies(agx_builder *b,
struct agx_copy *copies,
agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
unsigned num_copies)
{
struct copy_ctx _ctx = {
.entry_count = num_copies
};
struct copy_ctx _ctx = {.entry_count = num_copies};
struct copy_ctx *ctx = &_ctx;
@ -274,8 +271,10 @@ agx_emit_parallel_copies(agx_builder *b,
for (unsigned j = 0; j < ctx->entry_count; j++) {
struct agx_copy *blocking = &ctx->entries[j];
if (blocking->src.value >= entry->dest &&
blocking->src.value < entry->dest + agx_size_align_16(entry->src.size)) {
blocking->src.value = entry->src.value + (blocking->src.value - entry->dest);
blocking->src.value <
entry->dest + agx_size_align_16(entry->src.size)) {
blocking->src.value =
entry->src.value + (blocking->src.value - entry->dest);
}
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "agx_builder.h"
#include "agx_compiler.h"
/* Lower pseudo instructions created during optimization. */
static agx_instr *
@ -46,7 +46,8 @@ lower(agx_builder *b, agx_instr *I)
case AGX_OPCODE_OR:
return agx_bitop_to(b, I->dest[0], I->src[0], I->src[1], AGX_BITOP_OR);
default: return NULL;
default:
return NULL;
}
}

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
#define AGX_TEXTURE_DESC_STRIDE 24
@ -53,7 +53,7 @@ static nir_ssa_def *
agx_txs(nir_builder *b, nir_tex_instr *tex)
{
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
nir_ssa_def *comp[4] = { NULL };
nir_ssa_def *comp[4] = {NULL};
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
nir_ssa_def *w0 = nir_channel(b, desc, 0);
@ -61,26 +61,26 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
nir_ssa_def *w3 = nir_channel(b, desc, 3);
/* Width minus 1: bits [28, 42) */
nir_ssa_def *width_m1 = nir_ior(b, nir_ushr_imm(b, w0, 28),
nir_ishl_imm(b, nir_iand_imm(b, w1,
BITFIELD_MASK(14 - 4)), 4));
nir_ssa_def *width_m1 =
nir_ior(b, nir_ushr_imm(b, w0, 28),
nir_ishl_imm(b, nir_iand_imm(b, w1, BITFIELD_MASK(14 - 4)), 4));
/* Height minus 1: bits [42, 56) */
nir_ssa_def *height_m1 = nir_iand_imm(b, nir_ushr_imm(b, w1, 42 - 32),
BITFIELD_MASK(14));
nir_ssa_def *height_m1 =
nir_iand_imm(b, nir_ushr_imm(b, w1, 42 - 32), BITFIELD_MASK(14));
/* Depth minus 1: bits [110, 124) */
nir_ssa_def *depth_m1 = nir_iand_imm(b, nir_ushr_imm(b, w3, 110 - 96),
BITFIELD_MASK(14));
nir_ssa_def *depth_m1 =
nir_iand_imm(b, nir_ushr_imm(b, w3, 110 - 96), BITFIELD_MASK(14));
/* First level: bits [56, 60) */
nir_ssa_def *lod = nir_iand_imm(b, nir_ushr_imm(b, w1, 56 - 32),
BITFIELD_MASK(4));
nir_ssa_def *lod =
nir_iand_imm(b, nir_ushr_imm(b, w1, 56 - 32), BITFIELD_MASK(4));
/* Add LOD offset to first level to get the interesting LOD */
int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
if (lod_idx >= 0)
lod = nir_iadd(b, lod, nir_u2u32(b, nir_ssa_for_src(b,
tex->src[lod_idx].src, 1)));
lod = nir_iadd(
b, lod, nir_u2u32(b, nir_ssa_for_src(b, tex->src[lod_idx].src, 1)));
/* Add 1 to width-1, height-1 to get base dimensions */
nir_ssa_def *width = nir_iadd_imm(b, width_m1, 1);
@ -132,7 +132,6 @@ lower_txs(nir_builder *b, nir_instr *instr, UNUSED void *data)
bool
agx_lower_resinfo(nir_shader *s)
{
return nir_shader_instructions_pass(s, lower_txs,
nir_metadata_block_index |
nir_metadata_dominance, NULL);
return nir_shader_instructions_pass(
s, lower_txs, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View file

@ -39,9 +39,9 @@ agx_minifloat_decode(uint8_t imm)
unsigned mantissa = (imm & 0xF);
if (exp)
return ldexpf(sign * (float) (mantissa | 0x10), exp - 7);
return ldexpf(sign * (float)(mantissa | 0x10), exp - 7);
else
return ldexpf(sign * ((float) mantissa), -6);
return ldexpf(sign * ((float)mantissa), -6);
}
/* Encodes a float. Results are only valid if the float can be represented

View file

@ -3,8 +3,8 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_compiler.h"
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
/*
* Lower load_interpolated_input instructions with unused components of their
@ -27,7 +27,7 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
b->cursor = nir_before_instr(instr);
unsigned bit_size = nir_dest_bit_size(intr->dest);
nir_ssa_def *comps[4] = { NULL };
nir_ssa_def *comps[4] = {NULL};
for (unsigned c = 0; c < intr->num_components; ++c) {
if (mask & BITFIELD_BIT(c)) {
@ -76,8 +76,6 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
bool
agx_nir_lower_load_mask(nir_shader *shader)
{
return nir_shader_instructions_pass(shader, pass,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
return nir_shader_instructions_pass(
shader, pass, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View file

@ -23,10 +23,10 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_builtin_builder.h"
#include "agx_compiler.h"
static nir_ssa_def *
steal_tex_src(nir_tex_instr *tex, nir_tex_src_type type_)
@ -89,11 +89,11 @@ lower_array_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
* vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
* little benefit (a minor optimization, I guess).
*/
nir_ssa_def *sample_array =
(ms_idx && layer) ? nir_pack_32_2x16_split(b, ms_idx, layer) :
ms_idx ? nir_u2u32(b, ms_idx) :
layer ? nir_u2u32(b, layer) :
NULL;
nir_ssa_def *sample_array = (ms_idx && layer)
? nir_pack_32_2x16_split(b, ms_idx, layer)
: ms_idx ? nir_u2u32(b, ms_idx)
: layer ? nir_u2u32(b, layer)
: NULL;
/* Combine into the final 32-bit tuple */
if (sample_array != NULL) {
@ -109,7 +109,7 @@ lower_array_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
bool
agx_nir_lower_array_texture(nir_shader *s)
{
return nir_shader_instructions_pass(s, lower_array_texture,
nir_metadata_block_index |
nir_metadata_dominance, NULL);
return nir_shader_instructions_pass(
s, lower_array_texture, nir_metadata_block_index | nir_metadata_dominance,
NULL);
}

View file

@ -4,8 +4,8 @@
*/
#include <assert.h>
#include "agx_compiler.h"
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
#include "agx_internal_formats.h"
static bool
@ -26,13 +26,11 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
unsigned dest_size = nir_dest_bit_size(intr->dest);
assert((dest_size == 16 || dest_size == 32) && "other sizes lowered");
nir_ssa_def *value =
nir_load_constant_agx(b, intr->num_components, dest_size,
nir_load_ubo_base_agx(b, ubo_index),
nir_udiv_imm(b, offset, (dest_size / 8)),
.format = (dest_size == 32) ?
AGX_INTERNAL_FORMAT_I32 :
AGX_INTERNAL_FORMAT_I16);
nir_ssa_def *value = nir_load_constant_agx(
b, intr->num_components, dest_size, nir_load_ubo_base_agx(b, ubo_index),
nir_udiv_imm(b, offset, (dest_size / 8)),
.format =
(dest_size == 32) ? AGX_INTERNAL_FORMAT_I32 : AGX_INTERNAL_FORMAT_I16);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, value);
return true;
@ -41,8 +39,6 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
bool
agx_nir_lower_ubo(nir_shader *shader)
{
return nir_shader_instructions_pass(shader, pass,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
return nir_shader_instructions_pass(
shader, pass, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View file

@ -3,13 +3,13 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_compiler.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
#define ALL_SAMPLES 0xFF
#define BASE_Z 1
#define BASE_S 2
#define BASE_Z 1
#define BASE_S 2
static bool
lower(nir_function_impl *impl, nir_block *block)
@ -26,7 +26,8 @@ lower(nir_function_impl *impl, nir_block *block)
continue;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != FRAG_RESULT_DEPTH && sem.location != FRAG_RESULT_STENCIL)
if (sem.location != FRAG_RESULT_DEPTH &&
sem.location != FRAG_RESULT_STENCIL)
continue;
if (zs_emit == NULL) {
@ -34,7 +35,8 @@ lower(nir_function_impl *impl, nir_block *block)
nir_builder_init(&b, impl);
b.cursor = nir_before_instr(instr);
/* Multisampling will get lowered later if needed, default to broadcast */
/* Multisampling will get lowered later if needed, default to broadcast
*/
nir_ssa_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
zs_emit = nir_store_zs_agx(&b, sample_mask,
nir_ssa_undef(&b, 1, 32) /* depth */,
@ -45,7 +47,7 @@ lower(nir_function_impl *impl, nir_block *block)
bool z = (sem.location == FRAG_RESULT_DEPTH);
unsigned src_idx = z ? 1 : 2;
unsigned base = z ? BASE_Z : BASE_S;
unsigned base = z ? BASE_Z : BASE_S;
assert((nir_intrinsic_base(zs_emit) & base) == 0 &&
"each of depth/stencil may only be written once");
@ -76,8 +78,8 @@ agx_nir_lower_zs_emit(nir_shader *s)
}
if (progress) {
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
nir_metadata_preserve(
function->impl, nir_metadata_block_index | nir_metadata_dominance);
} else {
nir_metadata_preserve(function->impl, nir_metadata_all);
}

View file

@ -26,7 +26,8 @@
#include "agx_compiler.h"
static bool
nir_scalarize_preamble(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
nir_scalarize_preamble(struct nir_builder *b, nir_instr *instr,
UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
@ -38,8 +39,10 @@ nir_scalarize_preamble(struct nir_builder *b, nir_instr *instr, UNUSED void *dat
bool is_load = (intr->intrinsic == nir_intrinsic_load_preamble);
nir_ssa_def *v = is_load ? &intr->dest.ssa :
nir_ssa_for_src(b, intr->src[0], nir_src_num_components(intr->src[0]));
nir_ssa_def *v = is_load
? &intr->dest.ssa
: nir_ssa_for_src(b, intr->src[0],
nir_src_num_components(intr->src[0]));
if (v->num_components == 1)
return false;
@ -52,12 +55,14 @@ nir_scalarize_preamble(struct nir_builder *b, nir_instr *instr, UNUSED void *dat
if (is_load) {
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < v->num_components; ++i)
comps[i] = nir_load_preamble(b, 1, v->bit_size, .base = base + (i * stride));
comps[i] =
nir_load_preamble(b, 1, v->bit_size, .base = base + (i * stride));
nir_ssa_def_rewrite_uses(v, nir_vec(b, comps, v->num_components));
} else {
for (unsigned i = 0; i < v->num_components; ++i)
nir_store_preamble(b, nir_channel(b, v, i), .base = base + (i * stride));
nir_store_preamble(b, nir_channel(b, v, i),
.base = base + (i * stride));
nir_instr_remove(instr);
}
@ -111,17 +116,15 @@ static float
rewrite_cost(nir_ssa_def *def, const void *data)
{
bool mov_needed = false;
nir_foreach_use (use, def) {
nir_foreach_use(use, def) {
nir_instr *parent_instr = use->parent_instr;
if (parent_instr->type != nir_instr_type_alu) {
mov_needed = true;
break;
} else {
nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
if (alu->op == nir_op_vec2 ||
alu->op == nir_op_vec3 ||
alu->op == nir_op_vec4 ||
alu->op == nir_op_mov) {
if (alu->op == nir_op_vec2 || alu->op == nir_op_vec3 ||
alu->op == nir_op_vec4 || alu->op == nir_op_mov) {
mov_needed = true;
break;
} else {
@ -164,9 +167,9 @@ agx_nir_opt_preamble(nir_shader *nir, unsigned *preamble_size)
* scalarized for the backend to process them appropriately.
*/
if (progress) {
nir_shader_instructions_pass(nir, nir_scalarize_preamble,
nir_metadata_block_index |
nir_metadata_dominance, NULL);
nir_shader_instructions_pass(
nir, nir_scalarize_preamble,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
return progress;

View file

@ -79,14 +79,19 @@ instrs_equal(const void *_i1, const void *_i2)
{
const agx_instr *i1 = _i1, *i2 = _i2;
if (i1->op != i2->op) return false;
if (i1->nr_srcs != i2->nr_srcs) return false;
if (i1->nr_dests != i2->nr_dests) return false;
if (i1->op != i2->op)
return false;
if (i1->nr_srcs != i2->nr_srcs)
return false;
if (i1->nr_dests != i2->nr_dests)
return false;
/* Explicitly skip everything but size and type */
agx_foreach_dest(i1, d) {
if (i1->dest[d].type != i2->dest[d].type) return false;
if (i1->dest[d].size != i2->dest[d].size) return false;
if (i1->dest[d].type != i2->dest[d].type)
return false;
if (i1->dest[d].size != i2->dest[d].size)
return false;
}
agx_foreach_src(i1, s) {
@ -96,15 +101,24 @@ instrs_equal(const void *_i1, const void *_i2)
return false;
}
if (i1->imm != i2->imm) return false;
if (i1->perspective != i2->perspective) return false;
if (i1->invert_cond != i2->invert_cond) return false;
if (i1->dim != i2->dim) return false;
if (i1->offset != i2->offset) return false;
if (i1->shadow != i2->shadow) return false;
if (i1->shift != i2->shift) return false;
if (i1->saturate != i2->saturate) return false;
if (i1->mask != i2->mask) return false;
if (i1->imm != i2->imm)
return false;
if (i1->perspective != i2->perspective)
return false;
if (i1->invert_cond != i2->invert_cond)
return false;
if (i1->dim != i2->dim)
return false;
if (i1->offset != i2->offset)
return false;
if (i1->shadow != i2->shadow)
return false;
if (i1->shift != i2->shift)
return false;
if (i1->saturate != i2->saturate)
return false;
if (i1->mask != i2->mask)
return false;
return true;
}

View file

@ -72,8 +72,8 @@
static bool
agx_is_fmov(agx_instr *def)
{
return (def->op == AGX_OPCODE_FADD)
&& agx_is_equiv(def->src[1], agx_negzero());
return (def->op == AGX_OPCODE_FADD) &&
agx_is_equiv(def->src[1], agx_negzero());
}
/* Compose floating-point modifiers with floating-point sources */
@ -98,39 +98,48 @@ agx_optimizer_fmov(agx_instr **defs, agx_instr *ins)
agx_index src = ins->src[s];
agx_instr *def = defs[src.value];
if (def == NULL) continue; /* happens for phis in loops */
if (!agx_is_fmov(def)) continue;
if (def->saturate) continue;
if (def == NULL)
continue; /* happens for phis in loops */
if (!agx_is_fmov(def))
continue;
if (def->saturate)
continue;
ins->src[s] = agx_compose_float_src(src, def->src[0]);
}
}
static void
agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
unsigned srcs, bool is_float)
agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs,
bool is_float)
{
for (unsigned s = 0; s < srcs; ++s) {
agx_index src = I->src[s];
if (src.type != AGX_INDEX_NORMAL) continue;
if (src.type != AGX_INDEX_NORMAL)
continue;
agx_instr *def = defs[src.value];
if (def->op != AGX_OPCODE_MOV_IMM) continue;
if (def->op != AGX_OPCODE_MOV_IMM)
continue;
uint8_t value = def->imm;
bool float_src = is_float;
/* cmpselsrc takes integer immediates only */
if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
if (I->op == AGX_OPCODE_ST_TILE && s == 0) continue;
if (I->op == AGX_OPCODE_ZS_EMIT && s != 0) continue;
if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL)
float_src = false;
if (I->op == AGX_OPCODE_ST_TILE && s == 0)
continue;
if (I->op == AGX_OPCODE_ZS_EMIT && s != 0)
continue;
if (float_src) {
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
if (!agx_minifloat_exact(f)) continue;
if (!agx_minifloat_exact(f))
continue;
I->src[s] = agx_immediate_f(f);
} else if (value == def->imm) {
@ -142,8 +151,10 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
static bool
agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
{
if (!agx_is_fmov(use)) return false;
if (use->src[0].neg || use->src[0].abs) return false;
if (!agx_is_fmov(use))
return false;
if (use->src[0].neg || use->src[0].abs)
return false;
/* saturate(saturate(x)) = saturate(x) */
I->saturate |= use->saturate;
@ -158,17 +169,21 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
agx_index src = I->src[s];
agx_instr *def = defs[src.value];
if (def == NULL) continue; /* happens for phis in loops */
if (def->op != AGX_OPCODE_MOV) continue;
if (def == NULL)
continue; /* happens for phis in loops */
if (def->op != AGX_OPCODE_MOV)
continue;
/* At the moment, not all instructions support size conversions. Notably
* RA pseudo instructions don't handle size conversions. This should be
* refined in the future.
*/
if (def->src[0].size != src.size) continue;
if (def->src[0].size != src.size)
continue;
/* Immediate inlining happens elsewhere */
if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue;
if (def->src[0].type == AGX_INDEX_IMMEDIATE)
continue;
/* Not all instructions can take uniforms. Memory instructions can take
* uniforms, but only for their base (first) source and only in the
@ -179,15 +194,12 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
I->op == AGX_OPCODE_TEXTURE_SAMPLE ||
(I->op == AGX_OPCODE_DEVICE_LOAD &&
(s != 0 || def->src[0].value >= 256)) ||
I->op == AGX_OPCODE_PHI ||
I->op == AGX_OPCODE_ZS_EMIT ||
I->op == AGX_OPCODE_ST_TILE ||
I->op == AGX_OPCODE_LD_TILE ||
I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_ZS_EMIT ||
I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE ||
I->op == AGX_OPCODE_BLOCK_IMAGE_STORE ||
/*I->op == AGX_OPCODE_DEVICE_STORE ||*/
I->op == AGX_OPCODE_UNIFORM_STORE ||
I->op == AGX_OPCODE_ST_VARY))
continue;
I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY))
continue;
/* ALU instructions cannot take 64-bit */
if (def->src[0].size == AGX_SIZE_64 &&
@ -218,8 +230,7 @@ agx_optimizer_forward(agx_context *ctx)
agx_optimizer_fmov(defs, I);
/* Inline immediates if we can. TODO: systematic */
if (I->op != AGX_OPCODE_ST_VARY &&
I->op != AGX_OPCODE_COLLECT &&
if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_COLLECT &&
I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
I->op != AGX_OPCODE_TEXTURE_LOAD &&
I->op != AGX_OPCODE_UNIFORM_STORE &&

View file

@ -38,9 +38,14 @@ assert_register_is_aligned(agx_index reg)
assert(reg.type == AGX_INDEX_REGISTER);
switch (reg.size) {
case AGX_SIZE_16: return;
case AGX_SIZE_32: assert((reg.value & 1) == 0 && "unaligned reg"); return;
case AGX_SIZE_64: assert((reg.value & 3) == 0 && "unaligned reg"); return;
case AGX_SIZE_16:
return;
case AGX_SIZE_32:
assert((reg.value & 1) == 0 && "unaligned reg");
return;
case AGX_SIZE_64:
assert((reg.value & 3) == 0 && "unaligned reg");
return;
}
unreachable("Invalid register size");
@ -161,11 +166,8 @@ agx_pack_alu_dst(agx_index dest)
enum agx_size size = dest.size;
assert(reg < 0x100);
return
(dest.cache ? (1 << 0) : 0) |
((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
((size == AGX_SIZE_64) ? (1 << 2) : 0) |
((reg << 2));
return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
}
static unsigned
@ -178,34 +180,27 @@ agx_pack_alu_src(agx_index src)
/* Flags 0 for an 8-bit immediate */
assert(value < 0x100);
return
(value & BITFIELD_MASK(6)) |
((value >> 6) << 10);
return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
} else if (src.type == AGX_INDEX_UNIFORM) {
assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
assert(value < AGX_NUM_UNIFORMS);
return
(value & BITFIELD_MASK(6)) |
((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
((size == AGX_SIZE_32) ? (1 << 7) : 0) |
(0x1 << 8) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
return (value & BITFIELD_MASK(6)) |
((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
} else {
assert_register_is_aligned(src);
assert(!(src.cache && src.discard));
unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
unsigned size_flag =
(size == AGX_SIZE_64) ? 0x3 :
(size == AGX_SIZE_32) ? 0x2 :
(size == AGX_SIZE_16) ? 0x0 : 0x0;
unsigned size_flag = (size == AGX_SIZE_64) ? 0x3
: (size == AGX_SIZE_32) ? 0x2
: (size == AGX_SIZE_16) ? 0x0
: 0x0;
return
(value & BITFIELD_MASK(6)) |
(hint << 6) |
(size_flag << 8) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
}
}
@ -219,20 +214,14 @@ agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size)
/* Flags 0x4 for an 8-bit immediate */
assert(value < 0x100);
return
(value & BITFIELD_MASK(6)) |
(0x4 << 6) |
((value >> 6) << 10);
return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
} else if (src.type == AGX_INDEX_UNIFORM) {
assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
assert(size == dest_size);
assert(value < 0x200);
return
(value & BITFIELD_MASK(6)) |
((value >> 8) << 6) |
(0x3 << 7) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
} else {
assert(src.type == AGX_INDEX_REGISTER);
assert(!(src.cache && src.discard));
@ -242,10 +231,8 @@ agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size)
unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
return
(value & BITFIELD_MASK(6)) |
(hint << 6) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
return (value & BITFIELD_MASK(6)) | (hint << 6) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
}
}
@ -254,8 +241,7 @@ agx_pack_sample_mask_src(agx_index src)
{
unsigned value = src.value;
unsigned packed_value =
(value & BITFIELD_MASK(6)) |
(((value >> 6) & BITFIELD_MASK(2)) << 10);
(value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
if (src.type == AGX_INDEX_IMMEDIATE) {
assert(value < 0x100);
@ -272,8 +258,7 @@ agx_pack_sample_mask_src(agx_index src)
static unsigned
agx_pack_float_mod(agx_index src)
{
return (src.abs ? (1 << 0) : 0)
| (src.neg ? (1 << 1) : 0);
return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
}
static bool
@ -299,8 +284,7 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
{
struct agx_opcode_info info = agx_opcodes_info[I->op];
bool is_16 = agx_all_16(I) && info.encoding_16.exact;
struct agx_encoding encoding = is_16 ?
info.encoding_16 : info.encoding;
struct agx_encoding encoding = is_16 ? info.encoding_16 : info.encoding;
assert(encoding.exact && "invalid encoding");
@ -314,7 +298,7 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
if (info.nr_dests) {
assert(info.nr_dests == 1);
unsigned D = agx_pack_alu_dst(I->dest[0]);
unsigned extend_offset = (sizeof(extend)*8) - 4;
unsigned extend_offset = (sizeof(extend) * 8) - 4;
raw |= (D & BITFIELD_MASK(8)) << 7;
extend |= ((D >> 8) << extend_offset);
@ -325,12 +309,11 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
}
for (unsigned s = 0; s < info.nr_srcs; ++s) {
bool is_cmpsel = (s >= 2) &&
(I->op == AGX_OPCODE_ICMPSEL || I->op == AGX_OPCODE_FCMPSEL);
bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
I->op == AGX_OPCODE_FCMPSEL);
unsigned src = is_cmpsel ?
agx_pack_cmpsel_src(I->src[s], I->dest[0].size) :
agx_pack_alu_src(I->src[s]);
unsigned src = is_cmpsel ? agx_pack_cmpsel_src(I->src[s], I->dest[0].size)
: agx_pack_alu_src(I->src[s]);
unsigned src_short = (src & BITFIELD_MASK(10));
unsigned src_extend = (src >> 10);
@ -355,10 +338,10 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
/* Sources come at predictable offsets */
unsigned offset = 16 + (12 * s);
raw |= (((uint64_t) src_short) << offset);
raw |= (((uint64_t)src_short) << offset);
/* Destination and each source get extended in reverse order */
unsigned extend_offset = (sizeof(extend)*8) - ((s + 3) * 2);
unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
extend |= (src_extend << extend_offset);
}
@ -367,25 +350,25 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
raw |= (I->truth_table & 0x3) << 26;
raw |= (uint64_t) (I->truth_table >> 2) << 38;
raw |= (uint64_t)(I->truth_table >> 2) << 38;
} else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
raw |= (uint64_t) (I->shift & 1) << 39;
raw |= (uint64_t) (I->shift >> 2) << 52;
raw |= (uint64_t)(I->shift & 1) << 39;
raw |= (uint64_t)(I->shift >> 2) << 52;
} else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
raw |= (uint64_t) (I->bfi_mask & 0x3) << 38;
raw |= (uint64_t) ((I->bfi_mask >> 2) & 0x3) << 50;
raw |= (uint64_t) ((I->bfi_mask >> 4) & 0x1) << 63;
raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
} else if (info.immediates & AGX_IMMEDIATE_SR) {
raw |= (uint64_t) (I->sr & 0x3F) << 16;
raw |= (uint64_t) (I->sr >> 6) << 26;
raw |= (uint64_t)(I->sr & 0x3F) << 16;
raw |= (uint64_t)(I->sr >> 6) << 26;
} else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
raw |= (uint64_t) (I->imm) << 8;
raw |= (uint64_t)(I->imm) << 8;
else if (info.immediates & AGX_IMMEDIATE_IMM)
raw |= (uint64_t) (I->imm) << 16;
raw |= (uint64_t)(I->imm) << 16;
else if (info.immediates & AGX_IMMEDIATE_ROUND)
raw |= (uint64_t) (I->imm) << 26;
raw |= (uint64_t)(I->imm) << 26;
else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
raw |= (uint64_t) (I->fcond) << 61;
raw |= (uint64_t)(I->fcond) << 61;
/* Determine length bit */
unsigned length = encoding.length_short;
@ -405,25 +388,26 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
if (I->op == AGX_OPCODE_IADD)
extend_offset -= 16;
raw |= (uint64_t) extend << extend_offset;
raw |= (uint64_t)extend << extend_offset;
memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
} else {
/* So far, >8 byte ALU is only to store the extend bits */
unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
unsigned hi = ((uint64_t) extend) << extend_offset;
unsigned hi = ((uint64_t)extend) << extend_offset;
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi, length - 8);
memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
length - 8);
}
}
static void
agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_instr *I)
agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
agx_instr *I)
{
switch (I->op) {
case AGX_OPCODE_LD_TILE:
case AGX_OPCODE_ST_TILE:
{
case AGX_OPCODE_ST_TILE: {
bool load = (I->op == AGX_OPCODE_LD_TILE);
unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);
assert(I->mask < 0x10);
@ -437,33 +421,24 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
unsigned S = sample_index.value;
assert(S < 0x100);
uint64_t raw =
agx_opcodes_info[I->op].encoding.exact |
((uint64_t) (D & BITFIELD_MASK(8)) << 7) |
(St << 22) |
((uint64_t) (I->format) << 24) |
((uint64_t) (I->pixel_offset & BITFIELD_MASK(7)) << 28) |
(load ? (1ull << 35) : 0) |
((uint64_t) (I->mask) << 36) |
((uint64_t) (I->pixel_offset >> 7) << 40) |
((uint64_t) (S & BITFIELD_MASK(6)) << 42) |
((uint64_t) (S >> 6) << 56) |
(((uint64_t) (D >> 8)) << 60);
uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
((uint64_t)(I->format) << 24) |
((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
(load ? (1ull << 35) : 0) | ((uint64_t)(I->mask) << 36) |
((uint64_t)(I->pixel_offset >> 7) << 40) |
((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
((uint64_t)(S >> 6) << 56) | (((uint64_t)(D >> 8)) << 60);
unsigned size = 8;
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
break;
}
case AGX_OPCODE_SAMPLE_MASK:
{
case AGX_OPCODE_SAMPLE_MASK: {
unsigned S = agx_pack_sample_mask_src(I->src[0]);
uint64_t raw =
0x7fc1 |
((S & 0xff) << 16) |
(0x3 << 24) |
((S >> 8) << 26) |
(0x158ull << 32);
uint64_t raw = 0x7fc1 | ((S & 0xff) << 16) | (0x3 << 24) |
((S >> 8) << 26) | (0x158ull << 32);
unsigned size = 8;
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
@ -471,8 +446,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
}
case AGX_OPCODE_ITER:
case AGX_OPCODE_LDCF:
{
case AGX_OPCODE_LDCF: {
bool flat = (I->op == AGX_OPCODE_LDCF);
unsigned D = agx_pack_alu_dst(I->dest[0]);
unsigned channels = (I->channels & 0x3);
@ -497,26 +471,20 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
bool kill = false; // TODO: optimize
uint64_t raw =
0x21 | (flat ? (1 << 7) : 0) |
(I->perspective ? (1 << 6) : 0) |
((D & 0xFF) << 7) |
(1ull << 15) | /* XXX */
((cf_I & BITFIELD_MASK(6)) << 16) |
((cf_J & BITFIELD_MASK(6)) << 24) |
(((uint64_t) channels) << 30) |
(!flat ? (1ull << 46) : 0) | /* XXX */
(kill ? (1ull << 52) : 0) | /* XXX */
(((uint64_t) (D >> 8)) << 56) |
((uint64_t) (cf_I >> 6) << 58) |
((uint64_t) (cf_J >> 6) << 60);
0x21 | (flat ? (1 << 7) : 0) | (I->perspective ? (1 << 6) : 0) |
((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
((cf_I & BITFIELD_MASK(6)) << 16) | ((cf_J & BITFIELD_MASK(6)) << 24) |
(((uint64_t)channels) << 30) | (!flat ? (1ull << 46) : 0) | /* XXX */
(kill ? (1ull << 52) : 0) | /* XXX */
(((uint64_t)(D >> 8)) << 56) | ((uint64_t)(cf_I >> 6) << 58) |
((uint64_t)(cf_J >> 6) << 60);
unsigned size = 8;
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
break;
}
case AGX_OPCODE_ST_VARY:
{
case AGX_OPCODE_ST_VARY: {
agx_index index_src = I->src[0];
agx_index value = I->src[1];
@ -526,14 +494,10 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
assert(value.size == AGX_SIZE_32);
uint64_t raw =
0x11 |
(I->last ? (1 << 7) : 0) |
((value.value & 0x3F) << 9) |
(((uint64_t) (index_src.value & 0x3F)) << 16) |
(0x80 << 16) | /* XXX */
((value.value >> 6) << 24) |
((index_src.value >> 6) << 26) |
(0x8u << 28); /* XXX */
0x11 | (I->last ? (1 << 7) : 0) | ((value.value & 0x3F) << 9) |
(((uint64_t)(index_src.value & 0x3F)) << 16) | (0x80 << 16) | /* XXX */
((value.value >> 6) << 24) | ((index_src.value >> 6) << 26) |
(0x8u << 28); /* XXX */
unsigned size = 4;
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
@ -541,8 +505,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
}
case AGX_OPCODE_DEVICE_LOAD:
case AGX_OPCODE_UNIFORM_STORE:
{
case AGX_OPCODE_UNIFORM_STORE: {
bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
bool is_store = is_uniform_store;
bool has_base = !is_uniform_store;
@ -563,7 +526,8 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
bool Rt, At = false, Ot;
unsigned R = agx_pack_memory_reg(reg, &Rt);
unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0;
unsigned O = agx_pack_memory_index(I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot);
unsigned O = agx_pack_memory_index(
I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot);
unsigned u1 = is_uniform_store ? 0 : 1; // XXX
unsigned u3 = 0;
unsigned u4 = is_uniform_store ? 0 : 4; // XXX
@ -574,29 +538,19 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
assert(format <= 0x10);
uint64_t raw =
agx_opcodes_info[I->op].encoding.exact |
((format & BITFIELD_MASK(3)) << 7) |
((R & BITFIELD_MASK(6)) << 10) |
((A & BITFIELD_MASK(4)) << 16) |
((O & BITFIELD_MASK(4)) << 20) |
(Ot ? (1 << 24) : 0) |
(I->src[1].abs ? (1 << 25) : 0) |
(is_uniform_store ? (2 << 25) : 0) |
(u1 << 26) |
(At << 27) |
(u3 << 28) |
(I->scoreboard << 30) |
(((uint64_t) ((O >> 4) & BITFIELD_MASK(4))) << 32) |
(((uint64_t) ((A >> 4) & BITFIELD_MASK(4))) << 36) |
(((uint64_t) ((R >> 6) & BITFIELD_MASK(2))) << 40) |
(((uint64_t) I->shift) << 42) |
(((uint64_t) u4) << 44) |
(L ? (1ull << 47) : 0) |
(((uint64_t) (format >> 3)) << 48) |
(((uint64_t) Rt) << 49) |
(((uint64_t) u5) << 50) |
(((uint64_t) mask) << 52) |
(((uint64_t) (O >> 8)) << 56);
agx_opcodes_info[I->op].encoding.exact |
((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
(Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) |
(is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
(u3 << 28) | (I->scoreboard << 30) |
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
(((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
(((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
(((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
(L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
(((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
(((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
unsigned size = L ? 8 : 6;
memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
@ -604,8 +558,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
}
case AGX_OPCODE_TEXTURE_LOAD:
case AGX_OPCODE_TEXTURE_SAMPLE:
{
case AGX_OPCODE_TEXTURE_SAMPLE: {
assert(I->mask != 0);
assert(I->format <= 0x10);
@ -621,49 +574,33 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
unsigned U = 0; // TODO: what is sampler ureg?
unsigned q1 = I->shadow;
unsigned q2 = 0; // XXX
unsigned q3 = 12; // XXX
unsigned q2 = 0; // XXX
unsigned q3 = 12; // XXX
unsigned kill = 0; // helper invocation kill bit
unsigned q5 = 0; // XXX
unsigned q6 = 0; // XXX
unsigned q5 = 0; // XXX
unsigned q6 = 0; // XXX
uint32_t extend =
((U & BITFIELD_MASK(5)) << 0) |
(kill << 5) |
((I->dim >> 3) << 7) |
((R >> 6) << 8) |
((C >> 6) << 10) |
((D >> 6) << 12) |
((T >> 6) << 14) |
((O & BITFIELD_MASK(6)) << 16) |
(q6 << 22) |
(I->offset << 27) |
((S >> 6) << 28) |
((O >> 6) << 30);
uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
((I->dim >> 3) << 7) | ((R >> 6) << 8) |
((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
((O & BITFIELD_MASK(6)) << 16) | (q6 << 22) |
(I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
bool L = (extend != 0);
assert(I->scoreboard == 0 && "todo");
uint64_t raw =
0x31 |
((I->op == AGX_OPCODE_TEXTURE_LOAD) ? (1 << 6) : 0) |
(Rt ? (1 << 8) : 0) |
((R & BITFIELD_MASK(6)) << 9) |
(L ? (1 << 15) : 0) |
((C & BITFIELD_MASK(6)) << 16) |
(Ct ? (1 << 22) : 0) |
(q1 << 23) |
((D & BITFIELD_MASK(6)) << 24) |
(q2 << 30) |
(((uint64_t) (T & BITFIELD_MASK(6))) << 32) |
(((uint64_t) Tt) << 38) |
(((uint64_t) (I->dim & BITFIELD_MASK(3))) << 40) |
(((uint64_t) q3) << 43) |
(((uint64_t) I->mask) << 48) |
(((uint64_t) I->lod_mode) << 52) |
(((uint64_t) (S & BITFIELD_MASK(6))) << 56) |
(((uint64_t) St) << 62) |
(((uint64_t) q5) << 63);
0x31 | ((I->op == AGX_OPCODE_TEXTURE_LOAD) ? (1 << 6) : 0) |
(Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
(L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
(Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
(q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
(((uint64_t)Tt) << 38) |
(((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
(((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
(((uint64_t)I->lod_mode) << 52) |
(((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
(((uint64_t)q5) << 63);
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
if (L)
@ -672,8 +609,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
break;
}
case AGX_OPCODE_BLOCK_IMAGE_STORE:
{
case AGX_OPCODE_BLOCK_IMAGE_STORE: {
enum agx_format F = I->format;
assert(F < 0x10);
@ -695,23 +631,15 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
unsigned unk2 = msaa ? 38 : 37; /* XXX */
unsigned unk3 = 1;
uint32_t word0 =
agx_opcodes_info[I->op].encoding.exact |
(1 << 15) /* we always set length bit for now */ |
((F & 1) << 8) |
((R & BITFIELD_MASK(6)) << 9) |
(unk1 ? (1u << 31) : 0);
uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
(1 << 15) /* we always set length bit for now */ |
((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
(unk1 ? (1u << 31) : 0);
uint32_t word1 =
(T & BITFIELD_MASK(6)) |
(Tt << 2) |
(unk2 << 9) |
((R >> 6) << 24);
(T & BITFIELD_MASK(6)) | (Tt << 2) | (unk2 << 9) | ((R >> 6) << 24);
uint32_t word2 =
(F >> 1) |
(unk3 ? (1 << 3) : 0) |
((T >> 6) << 14);
uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
@ -719,8 +647,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
break;
}
case AGX_OPCODE_ZS_EMIT:
{
case AGX_OPCODE_ZS_EMIT: {
agx_index S = I->src[0];
if (S.type == AGX_INDEX_IMMEDIATE)
assert(S.value < BITFIELD_BIT(8));
@ -732,30 +659,25 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
assert(I->zs >= 1 && I->zs <= 3);
uint32_t word0 =
agx_opcodes_info[I->op].encoding.exact |
((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
((S.value & BITFIELD_MASK(6)) << 9) |
((T.value & BITFIELD_MASK(6)) << 16) |
((T.value >> 6) << 26) |
((S.value >> 6) << 24) |
(I->zs << 29);
uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
((S.value & BITFIELD_MASK(6)) << 9) |
((T.value & BITFIELD_MASK(6)) << 16) |
((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
(I->zs << 29);
memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
break;
}
case AGX_OPCODE_JMP_EXEC_ANY:
case AGX_OPCODE_JMP_EXEC_NONE:
{
case AGX_OPCODE_JMP_EXEC_NONE: {
/* We don't implement indirect branches */
assert(I->target != NULL);
/* We'll fix the offset later. */
struct agx_branch_fixup fixup = {
.block = I->target,
.offset = emission->size
};
struct agx_branch_fixup fixup = {.block = I->target,
.offset = emission->size};
util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
@ -779,10 +701,10 @@ static void
agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
{
/* Branch offset is 2 bytes into the jump instruction */
uint8_t *location = ((uint8_t *) emission->data) + fix.offset + 2;
uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
/* Offsets are relative to the jump instruction */
int32_t patch = (int32_t) fix.block->offset - (int32_t) fix.offset;
int32_t patch = (int32_t)fix.block->offset - (int32_t)fix.offset;
/* Patch the binary */
memcpy(location, &patch, sizeof(patch));

View file

@ -37,8 +37,7 @@ agx_print_sized(char prefix, unsigned value, enum agx_size size, FILE *fp)
return;
case AGX_SIZE_64:
assert((value & 1) == 0);
fprintf(fp, "%c%u:%c%u", prefix, value >> 1,
prefix, (value >> 1) + 1);
fprintf(fp, "%c%u:%c%u", prefix, value >> 1, prefix, (value >> 1) + 1);
return;
}
@ -143,9 +142,9 @@ agx_print_instr(agx_instr *I, FILE *fp)
print_comma = true;
agx_print_index(I->src[s],
agx_opcodes_info[I->op].is_float &&
!(s >= 2 && I->op == AGX_OPCODE_FCMPSEL),
fp);
agx_opcodes_info[I->op].is_float &&
!(s >= 2 && I->op == AGX_OPCODE_FCMPSEL),
fp);
}
if (I->mask) {

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "agx_compiler.h"
#include "agx_builder.h"
#include "agx_compiler.h"
/* SSA-based register allocator */
@ -119,15 +119,15 @@ find_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
static void
reserve_live_in(struct ra_ctx *rctx)
{
int i;
BITSET_FOREACH_SET(i, rctx->block->live_in, rctx->shader->alloc) {
/* Skip values defined in loops when processing the loop header */
if (!BITSET_TEST(rctx->visited, i))
continue;
int i;
BITSET_FOREACH_SET(i, rctx->block->live_in, rctx->shader->alloc) {
/* Skip values defined in loops when processing the loop header */
if (!BITSET_TEST(rctx->visited, i))
continue;
for (unsigned j = 0; j < rctx->ncomps[i]; ++j)
BITSET_SET(rctx->used_regs, rctx->ssa_to_reg[i] + j);
}
for (unsigned j = 0; j < rctx->ncomps[i]; ++j)
BITSET_SET(rctx->used_regs, rctx->ssa_to_reg[i] + j);
}
}
static void
@ -251,7 +251,7 @@ pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
static void
agx_ra_assign_local(struct ra_ctx *rctx)
{
BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
BITSET_DECLARE(used_regs, AGX_NUM_REGS) = {0};
agx_block *block = rctx->block;
uint8_t *ssa_to_reg = rctx->ssa_to_reg;
@ -365,7 +365,7 @@ agx_insert_parallel_copies(agx_context *ctx, agx_block *block)
assert(src.type == AGX_INDEX_REGISTER);
assert(dest.size == src.size);
copies[i++] = (struct agx_copy) {
copies[i++] = (struct agx_copy){
.dest = dest.value,
.src = src,
};
@ -407,7 +407,7 @@ agx_ra(agx_context *ctx)
* to a NIR invariant, so we do not need special handling for this.
*/
agx_foreach_block(ctx, block) {
agx_ra_assign_local(&(struct ra_ctx) {
agx_ra_assign_local(&(struct ra_ctx){
.shader = ctx,
.block = block,
.ssa_to_reg = ssa_to_reg,
@ -437,7 +437,8 @@ agx_ra(agx_context *ctx)
agx_foreach_ssa_dest(ins, d) {
unsigned v = ssa_to_reg[ins->dest[d].value];
ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
ins->dest[d] =
agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
}
}
@ -455,10 +456,11 @@ agx_ra(agx_context *ctx)
/* Move the sources */
agx_foreach_src(ins, i) {
if (agx_is_null(ins->src[i])) continue;
if (agx_is_null(ins->src[i]))
continue;
assert(ins->src[i].size == ins->src[0].size);
copies[n++] = (struct agx_copy) {
copies[n++] = (struct agx_copy){
.dest = base + (i * width),
.src = ins->src[i],
};
@ -482,7 +484,7 @@ agx_ra(agx_context *ctx)
if (ins->dest[i].type != AGX_INDEX_REGISTER)
continue;
copies[n++] = (struct agx_copy) {
copies[n++] = (struct agx_copy){
.dest = ins->dest[i].value,
.src = agx_register(base + (i * width), ins->dest[i].size),
};
@ -494,8 +496,6 @@ agx_ra(agx_context *ctx)
agx_remove_instruction(ins);
continue;
}
}
/* Insert parallel copies lowering phi nodes */

View file

@ -32,7 +32,7 @@
agx_index
agx_indexed_sysval(agx_context *ctx, enum agx_push_type type,
enum agx_size size, unsigned index, unsigned length)
enum agx_size size, unsigned index, unsigned length)
{
/* Check if we already pushed */
for (unsigned i = 0; i < ctx->out->push_ranges; ++i) {
@ -48,18 +48,18 @@ agx_indexed_sysval(agx_context *ctx, enum agx_push_type type,
/* Otherwise, push */
assert(ctx->out->push_ranges < AGX_MAX_PUSH_RANGES);
ctx->out->push_count = ALIGN_POT(ctx->out->push_count, agx_size_align_16(size));
ctx->out->push_count =
ALIGN_POT(ctx->out->push_count, agx_size_align_16(size));
unsigned base = ctx->out->push_count;
ctx->out->push_count += length;
assert(ctx->out->push_count <= AGX_NUM_UNIFORMS);
ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) {
.type = type,
.base = base,
.length = length,
.indirect = false
};
ctx->out->push[ctx->out->push_ranges++] =
(struct agx_push){.type = type,
.base = base,
.length = length,
.indirect = false};
return agx_uniform(base + index, size);
}
@ -85,7 +85,7 @@ agx_vbo_base(agx_context *ctx, unsigned vbo)
ctx->out->push_count += 4;
assert(ctx->out->push_count <= AGX_NUM_UNIFORMS);
ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) {
ctx->out->push[ctx->out->push_ranges++] = (struct agx_push){
.type = AGX_PUSH_VBO_BASE,
.base = base,
.length = 4,

View file

@ -27,7 +27,10 @@
/* Validatation doesn't make sense in release builds */
#ifndef NDEBUG
#define agx_validate_assert(stmt) if (!(stmt)) { return false; }
#define agx_validate_assert(stmt) \
if (!(stmt)) { \
return false; \
}
/*
* If a block contains phi nodes, they must come at the start of the block. If a
@ -99,9 +102,8 @@ agx_validate_sources(agx_instr *I)
agx_validate_assert(!src.cache);
agx_validate_assert(!src.discard);
bool ldst =
(I->op == AGX_OPCODE_DEVICE_LOAD) ||
(I->op == AGX_OPCODE_UNIFORM_STORE);
bool ldst = (I->op == AGX_OPCODE_DEVICE_LOAD) ||
(I->op == AGX_OPCODE_UNIFORM_STORE);
/* Immediates are encoded as 8-bit (16-bit for memory load/store). For
* integers, they extend to 16-bit. For floating point, they are 8-bit
@ -131,7 +133,7 @@ agx_validate_defs(agx_instr *I, BITSET_WORD *defs)
}
agx_foreach_ssa_dest(I, d) {
/* Static single assignment */
/* Static single assignment */
if (BITSET_TEST(defs, I->dest[d].value))
return false;
@ -162,9 +164,9 @@ agx_validate(agx_context *ctx, const char *after)
agx_foreach_instr_global(ctx, I) {
if (!agx_validate_defs(I, defs)) {
fprintf(stderr, "Invalid defs after %s\n", after);
agx_print_instr(I, stdout);
fail = true;
fprintf(stderr, "Invalid defs after %s\n", after);
agx_print_instr(I, stdout);
fail = true;
}
}
@ -173,9 +175,9 @@ agx_validate(agx_context *ctx, const char *after)
agx_foreach_instr_global(ctx, I) {
if (!agx_validate_sources(I)) {
fprintf(stderr, "Invalid sources form after %s\n", after);
agx_print_instr(I, stdout);
fail = true;
fprintf(stderr, "Invalid sources form after %s\n", after);
agx_print_instr(I, stdout);
fail = true;
}
}

View file

@ -27,10 +27,10 @@
#ifndef __AGX_TEST_H
#define __AGX_TEST_H
#include <stdio.h>
#include <inttypes.h>
#include "agx_compiler.h"
#include <stdio.h>
#include "agx_builder.h"
#include "agx_compiler.h"
/* Helper to generate a agx_builder suitable for creating test instructions */
static inline agx_builder *
@ -72,8 +72,7 @@ agx_instr_equal(agx_instr *A, agx_instr *B)
if (memcmp(A->dest, B->dest, A->nr_dests * sizeof(agx_index)))
return false;
return memcmp((uint8_t *) A + pointers,
(uint8_t *) B + pointers,
return memcmp((uint8_t *)A + pointers, (uint8_t *)B + pointers,
sizeof(agx_instr) - pointers) == 0;
}
@ -83,8 +82,9 @@ agx_block_equal(agx_block *A, agx_block *B)
if (list_length(&A->instructions) != list_length(&B->instructions))
return false;
list_pair_for_each_entry(agx_instr, insA, insB,
&A->instructions, &B->instructions, link) {
list_pair_for_each_entry(agx_instr, insA, insB, &A->instructions,
&B->instructions, link)
{
if (!agx_instr_equal(insA, insB))
return false;
}
@ -98,8 +98,9 @@ agx_shader_equal(agx_context *A, agx_context *B)
if (list_length(&A->blocks) != list_length(&B->blocks))
return false;
list_pair_for_each_entry(agx_block, blockA, blockB,
&A->blocks, &B->blocks, link) {
list_pair_for_each_entry(agx_block, blockA, blockB, &A->blocks, &B->blocks,
link)
{
if (!agx_block_equal(blockA, blockB))
return false;
}
@ -107,30 +108,31 @@ agx_shader_equal(agx_context *A, agx_context *B)
return true;
}
#define ASSERT_SHADER_EQUAL(A, B) \
if (!agx_shader_equal(A, B)) { \
ADD_FAILURE(); \
fprintf(stderr, "Pass produced unexpected results"); \
fprintf(stderr, " Actual:\n"); \
agx_print_shader(A, stderr); \
fprintf(stderr, " Expected:\n"); \
agx_print_shader(B, stderr); \
fprintf(stderr, "\n"); \
} \
#define ASSERT_SHADER_EQUAL(A, B) \
if (!agx_shader_equal(A, B)) { \
ADD_FAILURE(); \
fprintf(stderr, "Pass produced unexpected results"); \
fprintf(stderr, " Actual:\n"); \
agx_print_shader(A, stderr); \
fprintf(stderr, " Expected:\n"); \
agx_print_shader(B, stderr); \
fprintf(stderr, "\n"); \
}
#define INSTRUCTION_CASE(instr, expected, pass) do { \
agx_builder *A = agx_test_builder(mem_ctx); \
agx_builder *B = agx_test_builder(mem_ctx); \
{ \
agx_builder *b = A; \
instr; \
} \
{ \
agx_builder *b = B; \
expected; \
} \
pass(A->shader); \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while(0)
#define INSTRUCTION_CASE(instr, expected, pass) \
do { \
agx_builder *A = agx_test_builder(mem_ctx); \
agx_builder *B = agx_test_builder(mem_ctx); \
{ \
agx_builder *b = A; \
instr; \
} \
{ \
agx_builder *b = B; \
expected; \
} \
pass(A->shader); \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
#endif

View file

@ -25,19 +25,20 @@
#include <gtest/gtest.h>
#define CASE(copies, expected) do { \
agx_builder *A = agx_test_builder(mem_ctx); \
agx_builder *B = agx_test_builder(mem_ctx); \
\
agx_emit_parallel_copies(A, copies, ARRAY_SIZE(copies)); \
\
{ \
agx_builder *b = B; \
expected; \
} \
\
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while(0)
#define CASE(copies, expected) \
do { \
agx_builder *A = agx_test_builder(mem_ctx); \
agx_builder *B = agx_test_builder(mem_ctx); \
\
agx_emit_parallel_copies(A, copies, ARRAY_SIZE(copies)); \
\
{ \
agx_builder *b = B; \
expected; \
} \
\
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
static inline void
xor_swap(agx_builder *b, agx_index x, agx_index y)
@ -48,163 +49,169 @@ xor_swap(agx_builder *b, agx_index x, agx_index y)
}
class LowerParallelCopy : public testing::Test {
protected:
LowerParallelCopy() {
protected:
LowerParallelCopy()
{
mem_ctx = ralloc_context(NULL);
}
~LowerParallelCopy() {
~LowerParallelCopy()
{
ralloc_free(mem_ctx);
}
void *mem_ctx;
};
TEST_F(LowerParallelCopy, UnrelatedCopies) {
TEST_F(LowerParallelCopy, UnrelatedCopies)
{
struct agx_copy test_1[] = {
{ .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 4, .src = agx_register(6, AGX_SIZE_32) },
{.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 4, .src = agx_register(6, AGX_SIZE_32)},
};
CASE(test_1, {
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 4, .src = agx_register(5, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 4, .src = agx_register(5, AGX_SIZE_16)},
};
CASE(test_2, {
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(5, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(5, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, RelatedSource)
{
struct agx_copy test_1[] = {
{ .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 4, .src = agx_register(2, AGX_SIZE_32) },
{.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
};
CASE(test_1, {
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 4, .src = agx_register(1, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 4, .src = agx_register(1, AGX_SIZE_16)},
};
CASE(test_2, {
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, DependentCopies)
{
struct agx_copy test_1[] = {
{ .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 4, .src = agx_register(0, AGX_SIZE_32) },
{.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
};
CASE(test_1, {
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 4, .src = agx_register(0, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 4, .src = agx_register(0, AGX_SIZE_16)},
};
CASE(test_2, {
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, ManyDependentCopies)
{
struct agx_copy test_1[] = {
{ .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 4, .src = agx_register(0, AGX_SIZE_32) },
{ .dest = 8, .src = agx_register(6, AGX_SIZE_32) },
{ .dest = 6, .src = agx_register(4, AGX_SIZE_32) },
{.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 4, .src = agx_register(0, AGX_SIZE_32)},
{.dest = 8, .src = agx_register(6, AGX_SIZE_32)},
{.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
};
CASE(test_1, {
agx_mov_to(b, agx_register(8, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
agx_mov_to(b, agx_register(6, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
agx_mov_to(b, agx_register(8, AGX_SIZE_32), agx_register(6, AGX_SIZE_32));
agx_mov_to(b, agx_register(6, AGX_SIZE_32), agx_register(4, AGX_SIZE_32));
agx_mov_to(b, agx_register(4, AGX_SIZE_32), agx_register(0, AGX_SIZE_32));
agx_mov_to(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 2, .src = agx_register(0, AGX_SIZE_16) },
{ .dest = 4, .src = agx_register(3, AGX_SIZE_16) },
{ .dest = 3, .src = agx_register(2, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
{.dest = 4, .src = agx_register(3, AGX_SIZE_16)},
{.dest = 3, .src = agx_register(2, AGX_SIZE_16)},
};
CASE(test_2, {
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(3, AGX_SIZE_16));
agx_mov_to(b, agx_register(3, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
agx_mov_to(b, agx_register(2, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
agx_mov_to(b, agx_register(4, AGX_SIZE_16), agx_register(3, AGX_SIZE_16));
agx_mov_to(b, agx_register(3, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
agx_mov_to(b, agx_register(2, AGX_SIZE_16), agx_register(0, AGX_SIZE_16));
agx_mov_to(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, Swap) {
TEST_F(LowerParallelCopy, Swap)
{
struct agx_copy test_1[] = {
{ .dest = 0, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 2, .src = agx_register(0, AGX_SIZE_32) },
{.dest = 0, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 2, .src = agx_register(0, AGX_SIZE_32)},
};
CASE(test_1, {
xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
xor_swap(b, agx_register(0, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
struct agx_copy test_2[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 1, .src = agx_register(0, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
};
CASE(test_2, {
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
});
}
TEST_F(LowerParallelCopy, Cycle3) {
TEST_F(LowerParallelCopy, Cycle3)
{
struct agx_copy test[] = {
{ .dest = 0, .src = agx_register(1, AGX_SIZE_16) },
{ .dest = 1, .src = agx_register(2, AGX_SIZE_16) },
{ .dest = 2, .src = agx_register(0, AGX_SIZE_16) },
{.dest = 0, .src = agx_register(1, AGX_SIZE_16)},
{.dest = 1, .src = agx_register(2, AGX_SIZE_16)},
{.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
};
/* XXX: requires 6 instructions. if we had a temp free, could do it in 4 */
CASE(test, {
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
});
}
/* Test case from Hack et al */
TEST_F(LowerParallelCopy, TwoSwaps) {
TEST_F(LowerParallelCopy, TwoSwaps)
{
struct agx_copy test[] = {
{ .dest = 4, .src = agx_register(2, AGX_SIZE_32) },
{ .dest = 6, .src = agx_register(4, AGX_SIZE_32) },
{ .dest = 2, .src = agx_register(6, AGX_SIZE_32) },
{ .dest = 8, .src = agx_register(8, AGX_SIZE_32) },
{.dest = 4, .src = agx_register(2, AGX_SIZE_32)},
{.dest = 6, .src = agx_register(4, AGX_SIZE_32)},
{.dest = 2, .src = agx_register(6, AGX_SIZE_32)},
{.dest = 8, .src = agx_register(8, AGX_SIZE_32)},
};
CASE(test, {
xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
xor_swap(b, agx_register(4, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
xor_swap(b, agx_register(6, AGX_SIZE_32), agx_register(2, AGX_SIZE_32));
});
}

View file

@ -25,12 +25,14 @@
#include <gtest/gtest.h>
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, agx_lower_pseudo)
#define CASE(instr, expected) \
INSTRUCTION_CASE(instr, expected, agx_lower_pseudo)
#define NEGCASE(instr) CASE(instr, instr)
class LowerPseudo : public testing::Test {
protected:
LowerPseudo() {
protected:
LowerPseudo()
{
mem_ctx = ralloc_context(NULL);
wx = agx_register(0, AGX_SIZE_32);
@ -38,7 +40,8 @@ protected:
wz = agx_register(4, AGX_SIZE_32);
}
~LowerPseudo() {
~LowerPseudo()
{
ralloc_free(mem_ctx);
}
@ -46,16 +49,19 @@ protected:
agx_index wx, wy, wz;
};
TEST_F(LowerPseudo, Move) {
TEST_F(LowerPseudo, Move)
{
CASE(agx_mov_to(b, wx, wy), agx_bitop_to(b, wx, wy, agx_zero(), 0xA));
}
TEST_F(LowerPseudo, Not) {
TEST_F(LowerPseudo, Not)
{
CASE(agx_not_to(b, wx, wy), agx_bitop_to(b, wx, wy, agx_zero(), 0x5));
}
TEST_F(LowerPseudo, BinaryBitwise) {
TEST_F(LowerPseudo, BinaryBitwise)
{
CASE(agx_and_to(b, wx, wy, wz), agx_bitop_to(b, wx, wy, wz, 0x8));
CASE(agx_xor_to(b, wx, wy, wz), agx_bitop_to(b, wx, wy, wz, 0x6));
CASE(agx_or_to(b, wx, wy, wz), agx_bitop_to(b, wx, wy, wz, 0xE));
CASE(agx_or_to(b, wx, wy, wz), agx_bitop_to(b, wx, wy, wz, 0xE));
}

View file

@ -32,12 +32,19 @@ agx_optimize_and_dce(agx_context *ctx)
agx_dce(ctx);
}
#define CASE(instr, expected, size) INSTRUCTION_CASE(\
{ UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_ ## size); \
instr; agx_unit_test(b, out); }, \
{ UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_ ## size); \
expected; agx_unit_test(b, out); }, \
agx_optimize_and_dce)
#define CASE(instr, expected, size) \
INSTRUCTION_CASE( \
{ \
UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
instr; \
agx_unit_test(b, out); \
}, \
{ \
UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
expected; \
agx_unit_test(b, out); \
}, \
agx_optimize_and_dce)
#define NEGCASE(instr, size) CASE(instr, instr, size)
@ -50,26 +57,28 @@ agx_optimize_and_dce(agx_context *ctx)
static inline agx_index
agx_fmov(agx_builder *b, agx_index s0)
{
agx_index tmp = agx_temp(b->shader, s0.size);
agx_fmov_to(b, tmp, s0);
return tmp;
agx_index tmp = agx_temp(b->shader, s0.size);
agx_fmov_to(b, tmp, s0);
return tmp;
}
class Optimizer : public testing::Test {
protected:
Optimizer() {
protected:
Optimizer()
{
mem_ctx = ralloc_context(NULL);
wx = agx_register(0, AGX_SIZE_32);
wy = agx_register(2, AGX_SIZE_32);
wz = agx_register(4, AGX_SIZE_32);
wx = agx_register(0, AGX_SIZE_32);
wy = agx_register(2, AGX_SIZE_32);
wz = agx_register(4, AGX_SIZE_32);
hx = agx_register(0, AGX_SIZE_16);
hy = agx_register(1, AGX_SIZE_16);
hz = agx_register(2, AGX_SIZE_16);
hx = agx_register(0, AGX_SIZE_16);
hy = agx_register(1, AGX_SIZE_16);
hz = agx_register(2, AGX_SIZE_16);
}
~Optimizer() {
~Optimizer()
{
ralloc_free(mem_ctx);
}
@ -113,29 +122,30 @@ TEST_F(Optimizer, FusedFnegCancel)
TEST_F(Optimizer, FmulFsatF2F16)
{
CASE16({
CASE16(
{
agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
agx_fmov_to(b, out, tmp);
}, {
agx_fmul_to(b, out, wx, wy)->saturate = true;
});
},
{ agx_fmul_to(b, out, wx, wy)->saturate = true; });
}
TEST_F(Optimizer, Copyprop)
{
CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
agx_fmul_to(b, out, wx, wy));
}
TEST_F(Optimizer, InlineHazards)
{
NEGCASE32({
agx_instr *I = agx_collect_to(b, out, 4);
I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0);
I->src[1] = wy;
I->src[2] = wz;
I->src[3] = wz;
agx_instr *I = agx_collect_to(b, out, 4);
I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0);
I->src[1] = wy;
I->src[2] = wz;
I->src[3] = wz;
});
}
@ -153,23 +163,22 @@ TEST_F(Optimizer, CopypropRespectsAbsNeg)
TEST_F(Optimizer, IntCopyprop)
{
CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy),
agx_xor_to(b, out, wx, wy));
CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
}
TEST_F(Optimizer, IntCopypropDoesntConvert)
{
NEGCASE32({
agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
agx_mov_to(b, cvt, hx);
agx_xor_to(b, out, cvt, wy);
agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
agx_mov_to(b, cvt, hx);
agx_xor_to(b, out, cvt, wy);
});
}
TEST_F(Optimizer, SkipPreloads)
{
NEGCASE32({
agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
agx_xor_to(b, out, preload, wy);
agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
agx_xor_to(b, out, preload, wy);
});
}

View file

@ -39,9 +39,9 @@ drm_shim_driver_init(void)
shim_device.driver_ioctl_count = ARRAY_SIZE(driver_ioctls);
drm_shim_override_file("DRIVER=asahi\n"
"OF_FULLNAME=/soc/agx\n"
"OF_COMPATIBLE_0=apple,gpu-g13g\n"
"OF_COMPATIBLE_N=1\n",
"/sys/dev/char/%d:%d/device/uevent", DRM_MAJOR,
render_node_minor);
"OF_FULLNAME=/soc/agx\n"
"OF_COMPATIBLE_0=apple,gpu-g13g\n"
"OF_COMPATIBLE_N=1\n",
"/sys/dev/char/%d:%d/device/uevent", DRM_MAJOR,
render_node_minor);
}

View file

@ -112,9 +112,8 @@ ail_initialize_twiddled(struct ail_layout *layout)
* power-of-two miptree is used when either the width or the height is
* smaller than a single large tile.
*/
unsigned pot_level =
MIN2(ail_min_mip_below(w_el, tilesize_el.width_el),
ail_min_mip_below(h_el, tilesize_el.height_el));
unsigned pot_level = MIN2(ail_min_mip_below(w_el, tilesize_el.width_el),
ail_min_mip_below(h_el, tilesize_el.height_el));
/* First allocate the large miptree. All tiles in the large miptree are of
* size tilesize_el and have their dimensions given by stx/sty/sarea.
@ -158,19 +157,23 @@ ail_initialize_twiddled(struct ail_layout *layout)
layout->level_offsets_B[l] = offset_B;
offset_B = ALIGN_POT(offset_B + (blocksize_B * size_el), AIL_CACHELINE);
/* The tilesize is based on the true mipmap level size, not the POT rounded size */
unsigned tilesize_el = util_next_power_of_two(u_minify(MIN2(w_el, h_el), l));
layout->tilesize_el[l] = (struct ail_tile) { tilesize_el, tilesize_el };
/* The tilesize is based on the true mipmap level size, not the POT
* rounded size */
unsigned tilesize_el =
util_next_power_of_two(u_minify(MIN2(w_el, h_el), l));
layout->tilesize_el[l] = (struct ail_tile){tilesize_el, tilesize_el};
potw_el = u_minify(potw_el, 1);
poth_el = u_minify(poth_el, 1);
}
/* Align layer size if we have mipmaps and one miptree is larger than one page */
/* Align layer size if we have mipmaps and one miptree is larger than one
* page */
layout->page_aligned_layers = layout->levels != 1 && offset_B > AIL_PAGESIZE;
/* Single-layer images are not padded unless they are Z/S */
if (layout->depth_px == 1 && !util_format_is_depth_or_stencil(layout->format))
if (layout->depth_px == 1 &&
!util_format_is_depth_or_stencil(layout->format))
layout->page_aligned_layers = false;
if (layout->page_aligned_layers)
@ -184,7 +187,8 @@ ail_initialize_twiddled(struct ail_layout *layout)
static void
ail_initialize_compression(struct ail_layout *layout)
{
assert(!util_format_is_compressed(layout->format) && "Compressed pixel formats not supported");
assert(!util_format_is_compressed(layout->format) &&
"Compressed pixel formats not supported");
assert(util_format_get_blockwidth(layout->format) == 1);
assert(util_format_get_blockheight(layout->format) == 1);
assert(layout->width_px >= 16 && "Small textures are never compressed");
@ -227,9 +231,9 @@ ail_make_miptree(struct ail_layout *layout)
assert(layout->sample_count_sa == 1 &&
"Multisampled linear layouts not supported");
assert(util_format_get_blockwidth(layout->format) == 1 &&
"Strided linear block formats unsupported");
"Strided linear block formats unsupported");
assert(util_format_get_blockheight(layout->format) == 1 &&
"Strided linear block formats unsupported");
"Strided linear block formats unsupported");
} else {
assert(layout->linear_stride_B == 0 && "Invalid nonlinear layout");
assert(layout->depth_px >= 1 && "Invalid dimensions");
@ -241,12 +245,12 @@ ail_make_miptree(struct ail_layout *layout)
* allocate them all.
*/
if (layout->levels > 1) {
layout->levels = util_logbase2(MAX2(layout->width_px,
layout->height_px)) + 1;
layout->levels =
util_logbase2(MAX2(layout->width_px, layout->height_px)) + 1;
}
assert(util_format_get_blockdepth(layout->format) == 1 &&
"Deep formats unsupported");
"Deep formats unsupported");
switch (layout->tiling) {
case AIL_TILING_LINEAR:

View file

@ -26,15 +26,15 @@
#define __AIL_LAYOUT_H_
#include "util/format/u_format.h"
#include "util/u_math.h"
#include "util/macros.h"
#include "util/u_math.h"
#ifdef __cplusplus
extern "C" {
#endif
#define AIL_CACHELINE 0x80
#define AIL_PAGESIZE 0x4000
#define AIL_CACHELINE 0x80
#define AIL_PAGESIZE 0x4000
#define AIL_MAX_MIP_LEVELS 16
enum ail_tiling {
@ -183,9 +183,9 @@ ail_get_linear_pixel_B(struct ail_layout *layout, ASSERTED unsigned level,
assert(level == 0 && "Strided linear mipmapped textures are unsupported");
assert(z_px == 0 && "Strided linear 3D textures are unsupported");
assert(util_format_get_blockwidth(layout->format) == 1 &&
"Strided linear block formats unsupported");
"Strided linear block formats unsupported");
assert(util_format_get_blockheight(layout->format) == 1 &&
"Strided linear block formats unsupported");
"Strided linear block formats unsupported");
assert(layout->sample_count_sa == 1 &&
"Strided linear multisampling unsupported");
@ -201,17 +201,13 @@ ail_is_compressed(struct ail_layout *layout)
void ail_make_miptree(struct ail_layout *layout);
void
ail_detile(void *_tiled, void *_linear,
struct ail_layout *tiled_layout, unsigned level,
unsigned linear_pitch_B, unsigned sx_px, unsigned sy_px,
unsigned width_px, unsigned height_px);
void ail_detile(void *_tiled, void *_linear, struct ail_layout *tiled_layout,
unsigned level, unsigned linear_pitch_B, unsigned sx_px,
unsigned sy_px, unsigned width_px, unsigned height_px);
void
ail_tile(void *_tiled, void *_linear,
struct ail_layout *tiled_layout, unsigned level,
unsigned linear_pitch_B,
unsigned sx_px, unsigned sy_px, unsigned width_px, unsigned height_px);
void ail_tile(void *_tiled, void *_linear, struct ail_layout *tiled_layout,
unsigned level, unsigned linear_pitch_B, unsigned sx_px,
unsigned sy_px, unsigned width_px, unsigned height_px);
#ifdef __cplusplus
} /* extern C */

View file

@ -1277,12 +1277,11 @@ TEST(Miptree, Tests2D)
ail_make_miptree(&layout);
for (unsigned l = 0; l < test.levels; ++l) {
EXPECT_EQ(ail_get_level_offset_B(&layout, l), test.offsets[l]) <<
test.width << "x" << test.height << " " <<
util_format_short_name(test.format) <<
" texture has wrong offset at level " << l <<
", off by " <<
test.offsets[l] - ail_get_level_offset_B(&layout, l);
}
EXPECT_EQ(ail_get_level_offset_B(&layout, l), test.offsets[l])
<< test.width << "x" << test.height << " "
<< util_format_short_name(test.format)
<< " texture has wrong offset at level " << l << ", off by "
<< test.offsets[l] - ail_get_level_offset_B(&layout, l);
}
}
}

View file

@ -22,11 +22,11 @@
* SOFTWARE.
*/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "util/macros.h"
#include "layout.h"
@ -55,11 +55,11 @@
* applying the two's complement identity, we are left with (X - mask) & mask
*/
#define MOD_POT(x, y) (x) & ((y) - 1)
#define MOD_POT(x, y) (x) & ((y)-1)
typedef struct {
uint64_t lo;
uint64_t hi;
uint64_t lo;
uint64_t hi;
} __attribute__((packed)) ail_uint128_t;
static uint32_t
@ -67,9 +67,8 @@ ail_space_bits(unsigned x)
{
assert(x < 128 && "offset must be inside the tile");
return ((x & 1) << 0) | ((x & 2) << 1) | ((x & 4) << 2) |
((x & 8) << 3) | ((x & 16) << 4) | ((x & 32) << 5) |
((x & 64) << 6);
return ((x & 1) << 0) | ((x & 2) << 1) | ((x & 4) << 2) | ((x & 8) << 3) |
((x & 16) << 4) | ((x & 32) << 5) | ((x & 64) << 6);
}
/*
@ -85,74 +84,78 @@ ail_space_mask(unsigned x)
return MOD_POT(0x55555555, x * x);
}
#define TILED_UNALIGNED_TYPE(element_t, is_store) { \
enum pipe_format format = tiled_layout->format; \
unsigned linear_pitch_el = linear_pitch_B / blocksize_B; \
unsigned width_el = util_format_get_nblocksx(format, width_px); \
unsigned sx_el = util_format_get_nblocksx(format, sx_px); \
unsigned sy_el = util_format_get_nblocksy(format, sy_px); \
unsigned swidth_el = util_format_get_nblocksx(format, swidth_px); \
unsigned sheight_el = util_format_get_nblocksy(format, sheight_px); \
unsigned sx_end_el = sx_el + swidth_el; \
unsigned sy_end_el = sy_el + sheight_el; \
\
struct ail_tile tile_size = tiled_layout->tilesize_el[level]; \
unsigned tile_area_el = tile_size.width_el * tile_size.height_el; \
unsigned tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el); \
unsigned y_offs_el = ail_space_bits(MOD_POT(sy_el, tile_size.height_el)) << 1; \
unsigned x_offs_start_el = ail_space_bits(MOD_POT(sx_el, tile_size.width_el)); \
unsigned space_mask_x = ail_space_mask(tile_size.width_el); \
unsigned space_mask_y = ail_space_mask(tile_size.height_el) << 1; \
unsigned log2_tile_width_el = util_logbase2(tile_size.width_el); \
unsigned log2_tile_height_el = util_logbase2(tile_size.height_el); \
\
element_t *linear = _linear; \
element_t *tiled = _tiled; \
\
for (unsigned y_el = sy_el; y_el < sy_end_el; ++y_el) {\
unsigned y_rowtile = y_el >> log2_tile_height_el; \
unsigned y_tile = y_rowtile * tiles_per_row;\
unsigned x_offs_el = x_offs_start_el;\
\
element_t *linear_row = linear;\
\
for (unsigned x_el = sx_el; x_el < sx_end_el; ++x_el) {\
unsigned tile_idx = (y_tile + (x_el >> log2_tile_width_el));\
unsigned tile_offset_el = tile_idx * tile_area_el;\
\
element_t *ptiled = &tiled[tile_offset_el + y_offs_el + x_offs_el];\
element_t *plinear = (linear_row++);\
element_t *outp = (element_t *) (is_store ? ptiled : plinear); \
element_t *inp = (element_t *) (is_store ? plinear : ptiled); \
*outp = *inp;\
x_offs_el = (x_offs_el - space_mask_x) & space_mask_x;\
}\
\
y_offs_el = (y_offs_el - space_mask_y) & space_mask_y;\
linear += linear_pitch_el;\
}\
}
#define TILED_UNALIGNED_TYPE(element_t, is_store) \
{ \
enum pipe_format format = tiled_layout->format; \
unsigned linear_pitch_el = linear_pitch_B / blocksize_B; \
unsigned width_el = util_format_get_nblocksx(format, width_px); \
unsigned sx_el = util_format_get_nblocksx(format, sx_px); \
unsigned sy_el = util_format_get_nblocksy(format, sy_px); \
unsigned swidth_el = util_format_get_nblocksx(format, swidth_px); \
unsigned sheight_el = util_format_get_nblocksy(format, sheight_px); \
unsigned sx_end_el = sx_el + swidth_el; \
unsigned sy_end_el = sy_el + sheight_el; \
\
struct ail_tile tile_size = tiled_layout->tilesize_el[level]; \
unsigned tile_area_el = tile_size.width_el * tile_size.height_el; \
unsigned tiles_per_row = DIV_ROUND_UP(width_el, tile_size.width_el); \
unsigned y_offs_el = ail_space_bits(MOD_POT(sy_el, tile_size.height_el)) \
<< 1; \
unsigned x_offs_start_el = \
ail_space_bits(MOD_POT(sx_el, tile_size.width_el)); \
unsigned space_mask_x = ail_space_mask(tile_size.width_el); \
unsigned space_mask_y = ail_space_mask(tile_size.height_el) << 1; \
unsigned log2_tile_width_el = util_logbase2(tile_size.width_el); \
unsigned log2_tile_height_el = util_logbase2(tile_size.height_el); \
\
element_t *linear = _linear; \
element_t *tiled = _tiled; \
\
for (unsigned y_el = sy_el; y_el < sy_end_el; ++y_el) { \
unsigned y_rowtile = y_el >> log2_tile_height_el; \
unsigned y_tile = y_rowtile * tiles_per_row; \
unsigned x_offs_el = x_offs_start_el; \
\
element_t *linear_row = linear; \
\
for (unsigned x_el = sx_el; x_el < sx_end_el; ++x_el) { \
unsigned tile_idx = (y_tile + (x_el >> log2_tile_width_el)); \
unsigned tile_offset_el = tile_idx * tile_area_el; \
\
element_t *ptiled = \
&tiled[tile_offset_el + y_offs_el + x_offs_el]; \
element_t *plinear = (linear_row++); \
element_t *outp = (element_t *)(is_store ? ptiled : plinear); \
element_t *inp = (element_t *)(is_store ? plinear : ptiled); \
*outp = *inp; \
x_offs_el = (x_offs_el - space_mask_x) & space_mask_x; \
} \
\
y_offs_el = (y_offs_el - space_mask_y) & space_mask_y; \
linear += linear_pitch_el; \
} \
}
#define TILED_UNALIGNED_TYPES(blocksize_B, store) { \
if (blocksize_B == 1) \
TILED_UNALIGNED_TYPE(uint8_t, store) \
else if (blocksize_B == 2) \
TILED_UNALIGNED_TYPE(uint16_t, store) \
else if (blocksize_B == 4) \
TILED_UNALIGNED_TYPE(uint32_t, store) \
else if (blocksize_B == 8) \
TILED_UNALIGNED_TYPE(uint64_t, store) \
else if (blocksize_B == 16) \
TILED_UNALIGNED_TYPE(ail_uint128_t, store) \
else \
unreachable("Invalid block size"); \
}
#define TILED_UNALIGNED_TYPES(blocksize_B, store) \
{ \
if (blocksize_B == 1) \
TILED_UNALIGNED_TYPE(uint8_t, store) \
else if (blocksize_B == 2) \
TILED_UNALIGNED_TYPE(uint16_t, store) \
else if (blocksize_B == 4) \
TILED_UNALIGNED_TYPE(uint32_t, store) \
else if (blocksize_B == 8) \
TILED_UNALIGNED_TYPE(uint64_t, store) \
else if (blocksize_B == 16) \
TILED_UNALIGNED_TYPE(ail_uint128_t, store) \
else \
unreachable("Invalid block size"); \
}
void
ail_detile(void *_tiled, void *_linear,
struct ail_layout *tiled_layout, unsigned level,
unsigned linear_pitch_B, unsigned sx_px, unsigned sy_px,
unsigned swidth_px, unsigned sheight_px)
ail_detile(void *_tiled, void *_linear, struct ail_layout *tiled_layout,
unsigned level, unsigned linear_pitch_B, unsigned sx_px,
unsigned sy_px, unsigned swidth_px, unsigned sheight_px)
{
unsigned width_px = u_minify(tiled_layout->width_px, level);
unsigned height_px = u_minify(tiled_layout->height_px, level);
@ -167,10 +170,9 @@ ail_detile(void *_tiled, void *_linear,
}
void
ail_tile(void *_tiled, void *_linear,
struct ail_layout *tiled_layout, unsigned level,
unsigned linear_pitch_B, unsigned sx_px, unsigned sy_px,
unsigned swidth_px, unsigned sheight_px)
ail_tile(void *_tiled, void *_linear, struct ail_layout *tiled_layout,
unsigned level, unsigned linear_pitch_B, unsigned sx_px,
unsigned sy_px, unsigned swidth_px, unsigned sheight_px)
{
unsigned width_px = u_minify(tiled_layout->width_px, level);
unsigned height_px = u_minify(tiled_layout->height_px, level);

View file

@ -42,7 +42,7 @@ enum agx_alloc_type {
/* BO is shared across processes (imported or exported) and therefore cannot be
* cached locally */
#define AGX_BO_SHARED (1 << 0)
#define AGX_BO_SHARED (1 << 0)
struct agx_ptr {
/* If CPU mapped, CPU address. NULL if not mapped */
@ -99,9 +99,8 @@ struct agx_bo {
const char *label;
};
struct agx_bo *
agx_bo_create(struct agx_device *dev, unsigned size, unsigned flags,
const char *label);
struct agx_bo *agx_bo_create(struct agx_device *dev, unsigned size,
unsigned flags, const char *label);
void agx_bo_reference(struct agx_bo *bo);
void agx_bo_unreference(struct agx_bo *bo);

View file

@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include <inttypes.h>
#include "agx_device.h"
#include <inttypes.h>
#include "agx_bo.h"
#include "decode.h"
@ -37,9 +37,8 @@ agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
#if __APPLE__
const uint64_t handle = bo->handle;
kern_return_t ret = IOConnectCallScalarMethod(dev->fd,
AGX_SELECTOR_FREE_MEM,
&handle, 1, NULL, NULL);
kern_return_t ret = IOConnectCallScalarMethod(dev->fd, AGX_SELECTOR_FREE_MEM,
&handle, 1, NULL, NULL);
if (ret)
fprintf(stderr, "error freeing BO mem: %u\n", ret);
@ -55,10 +54,9 @@ void
agx_shmem_free(struct agx_device *dev, unsigned handle)
{
#if __APPLE__
const uint64_t input = handle;
kern_return_t ret = IOConnectCallScalarMethod(dev->fd,
AGX_SELECTOR_FREE_SHMEM,
&input, 1, NULL, NULL);
const uint64_t input = handle;
kern_return_t ret = IOConnectCallScalarMethod(
dev->fd, AGX_SELECTOR_FREE_SHMEM, &input, 1, NULL, NULL);
if (ret)
fprintf(stderr, "error freeing shmem: %u\n", ret);
@ -80,16 +78,16 @@ agx_shmem_alloc(struct agx_device *dev, size_t size, bool cmdbuf)
cmdbuf ? 1 : 0 // 2 - error reporting, 1 - no error reporting
};
kern_return_t ret = IOConnectCallMethod(dev->fd,
AGX_SELECTOR_CREATE_SHMEM, inputs, 2, NULL, 0, NULL,
NULL, &out, &out_sz);
kern_return_t ret =
IOConnectCallMethod(dev->fd, AGX_SELECTOR_CREATE_SHMEM, inputs, 2, NULL,
0, NULL, NULL, &out, &out_sz);
assert(ret == 0);
assert(out_sz == sizeof(out));
assert(out.size == size);
assert(out.map != 0);
bo = (struct agx_bo) {
bo = (struct agx_bo){
.type = cmdbuf ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP,
.handle = out.id,
.ptr.cpu = out.map,
@ -97,7 +95,7 @@ agx_shmem_alloc(struct agx_device *dev, size_t size, bool cmdbuf)
.guid = 0, /* TODO? */
};
#else
bo = (struct agx_bo) {
bo = (struct agx_bo){
.type = cmdbuf ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP,
.handle = AGX_FAKE_HANDLE++,
.ptr.cpu = calloc(1, size),
@ -113,8 +111,7 @@ agx_shmem_alloc(struct agx_device *dev, size_t size, bool cmdbuf)
}
static struct agx_bo *
agx_bo_alloc(struct agx_device *dev, size_t size,
uint32_t flags)
agx_bo_alloc(struct agx_device *dev, size_t size, uint32_t flags)
{
struct agx_bo *bo;
unsigned handle = 0;
@ -122,18 +119,18 @@ agx_bo_alloc(struct agx_device *dev, size_t size,
#if __APPLE__
uint32_t mode = 0x430; // shared, ?
uint32_t args_in[24] = { 0 };
args_in[4] = 0x4000101; //0x1000101; // unk
uint32_t args_in[24] = {0};
args_in[4] = 0x4000101; // 0x1000101; // unk
args_in[5] = mode;
args_in[16] = size;
args_in[20] = flags;
uint64_t out[10] = { 0 };
uint64_t out[10] = {0};
size_t out_sz = sizeof(out);
kern_return_t ret = IOConnectCallMethod(dev->fd,
AGX_SELECTOR_ALLOCATE_MEM, NULL, 0, args_in,
sizeof(args_in), NULL, 0, out, &out_sz);
kern_return_t ret =
IOConnectCallMethod(dev->fd, AGX_SELECTOR_ALLOCATE_MEM, NULL, 0, args_in,
sizeof(args_in), NULL, 0, out, &out_sz);
assert(ret == 0);
assert(out_sz == sizeof(out));
@ -148,7 +145,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size,
pthread_mutex_unlock(&dev->bo_map_lock);
/* Fresh handle */
assert(!memcmp(bo, &((struct agx_bo) {}), sizeof(*bo)));
assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo)));
bo->type = AGX_ALLOC_REGULAR;
bo->size = size;
@ -160,7 +157,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size,
#if __APPLE__
bo->ptr.gpu = out[0];
bo->ptr.cpu = (void *) out[1];
bo->ptr.cpu = (void *)out[1];
bo->guid = out[5];
#else
if (lo) {
@ -171,7 +168,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size,
AGX_FAKE_HI += bo->size;
}
bo->ptr.gpu = (((uint64_t) bo->handle) << (lo ? 16 : 24));
bo->ptr.gpu = (((uint64_t)bo->handle) << (lo ? 16 : 24));
bo->ptr.cpu = calloc(1, bo->size);
#endif
@ -222,8 +219,8 @@ agx_bo_cache_remove_locked(struct agx_device *dev, struct agx_bo *bo)
* BO. */
static struct agx_bo *
agx_bo_cache_fetch(struct agx_device *dev, size_t size, uint32_t flags, const
bool dontwait)
agx_bo_cache_fetch(struct agx_device *dev, size_t size, uint32_t flags,
const bool dontwait)
{
simple_mtx_lock(&dev->bo_cache.lock);
struct list_head *bucket = agx_bucket(dev, size);
@ -255,8 +252,8 @@ agx_bo_cache_evict_stale_bos(struct agx_device *dev)
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
list_for_each_entry_safe(struct agx_bo, entry,
&dev->bo_cache.lru, lru_link) {
list_for_each_entry_safe(struct agx_bo, entry, &dev->bo_cache.lru,
lru_link) {
/* We want all entries that have been used more than 1 sec ago to be
* dropped, others can be kept. Note the <= 2 check and not <= 1. It's
* here to account for the fact that we're only testing ->tv_sec, not
@ -291,11 +288,10 @@ agx_bo_cache_put_locked(struct agx_bo *bo)
dev->bo_cache.size += bo->size;
if (0) {
printf("BO cache: %zu KiB (+%zu KiB from %s, hit/miss %" PRIu64 "/%" PRIu64 ")\n",
printf("BO cache: %zu KiB (+%zu KiB from %s, hit/miss %" PRIu64
"/%" PRIu64 ")\n",
DIV_ROUND_UP(dev->bo_cache.size, 1024),
DIV_ROUND_UP(bo->size, 1024),
bo->label,
dev->bo_cache.hits,
DIV_ROUND_UP(bo->size, 1024), bo->label, dev->bo_cache.hits,
dev->bo_cache.misses);
}
@ -330,8 +326,7 @@ agx_bo_cache_evict_all(struct agx_device *dev)
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
struct list_head *bucket = &dev->bo_cache.buckets[i];
list_for_each_entry_safe(struct agx_bo, entry, bucket,
bucket_link) {
list_for_each_entry_safe(struct agx_bo, entry, bucket, bucket_link) {
agx_bo_cache_remove_locked(dev, entry);
agx_bo_free(dev, entry);
}
@ -443,9 +438,8 @@ agx_get_global_ids(struct agx_device *dev)
uint64_t out[2] = {};
size_t out_sz = sizeof(out);
ASSERTED kern_return_t ret = IOConnectCallStructMethod(dev->fd,
AGX_SELECTOR_GET_GLOBAL_IDS,
NULL, 0, &out, &out_sz);
ASSERTED kern_return_t ret = IOConnectCallStructMethod(
dev->fd, AGX_SELECTOR_GET_GLOBAL_IDS, NULL, 0, &out, &out_sz);
assert(ret == 0);
assert(out_sz == sizeof(out));
@ -490,7 +484,7 @@ agx_open_device(void *memctx, struct agx_device *dev)
return false;
const char *api = "Equestria";
char in[16] = { 0 };
char in[16] = {0};
assert(strlen(api) < sizeof(in));
memcpy(in, api, strlen(api));
@ -512,7 +506,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
list_inithead(&dev->bo_cache.buckets[i]);
dev->queue = agx_create_command_queue(dev);
dev->cmdbuf = agx_shmem_alloc(dev, 0x4000, true); // length becomes kernelCommandDataSize
dev->cmdbuf = agx_shmem_alloc(dev, 0x4000,
true); // length becomes kernelCommandDataSize
dev->memmap = agx_shmem_alloc(dev, 0x10000, false);
agx_get_global_ids(dev);
@ -541,9 +536,9 @@ agx_create_notification_queue(mach_port_t connection)
size_t resp_size = sizeof(resp);
assert(resp_size == 0x10);
ASSERTED kern_return_t ret = IOConnectCallStructMethod(connection,
AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE,
NULL, 0, &resp, &resp_size);
ASSERTED kern_return_t ret = IOConnectCallStructMethod(
connection, AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE, NULL, 0, &resp,
&resp_size);
assert(resp_size == sizeof(resp));
assert(ret == 0);
@ -551,11 +546,9 @@ agx_create_notification_queue(mach_port_t connection)
mach_port_t notif_port = IODataQueueAllocateNotificationPort();
IOConnectSetNotificationPort(connection, 0, notif_port, resp.unk2);
return (struct agx_notification_queue) {
.port = notif_port,
.queue = resp.queue,
.id = resp.unk2
};
return (struct agx_notification_queue){.port = notif_port,
.queue = resp.queue,
.id = resp.unk2};
}
#endif
@ -566,7 +559,7 @@ agx_create_command_queue(struct agx_device *dev)
struct agx_command_queue queue = {};
{
uint8_t buffer[1024 + 8] = { 0 };
uint8_t buffer[1024 + 8] = {0};
const char *path = "/tmp/a.out";
assert(strlen(path) < 1022);
memcpy(buffer + 0, path, strlen(path));
@ -582,10 +575,9 @@ agx_create_command_queue(struct agx_device *dev)
struct agx_create_command_queue_resp out = {};
size_t out_sz = sizeof(out);
ASSERTED kern_return_t ret = IOConnectCallStructMethod(dev->fd,
AGX_SELECTOR_CREATE_COMMAND_QUEUE,
buffer, sizeof(buffer),
&out, &out_sz);
ASSERTED kern_return_t ret =
IOConnectCallStructMethod(dev->fd, AGX_SELECTOR_CREATE_COMMAND_QUEUE,
buffer, sizeof(buffer), &out, &out_sz);
assert(ret == 0);
assert(out_sz == sizeof(out));
@ -597,41 +589,32 @@ agx_create_command_queue(struct agx_device *dev)
queue.notif = agx_create_notification_queue(dev->fd);
{
uint64_t scalars[2] = {
queue.id,
queue.notif.id
};
uint64_t scalars[2] = {queue.id, queue.notif.id};
ASSERTED kern_return_t ret = IOConnectCallScalarMethod(dev->fd,
0x1D,
scalars, 2, NULL, NULL);
ASSERTED kern_return_t ret =
IOConnectCallScalarMethod(dev->fd, 0x1D, scalars, 2, NULL, NULL);
assert(ret == 0);
}
{
uint64_t scalars[2] = {
queue.id,
0x1ffffffffull
};
uint64_t scalars[2] = {queue.id, 0x1ffffffffull};
ASSERTED kern_return_t ret = IOConnectCallScalarMethod(dev->fd,
0x31,
scalars, 2, NULL, NULL);
ASSERTED kern_return_t ret =
IOConnectCallScalarMethod(dev->fd, 0x31, scalars, 2, NULL, NULL);
assert(ret == 0);
}
return queue;
#else
return (struct agx_command_queue) {
0
};
return (struct agx_command_queue){0};
#endif
}
void
agx_submit_cmdbuf(struct agx_device *dev, unsigned cmdbuf, unsigned mappings, uint64_t scalar)
agx_submit_cmdbuf(struct agx_device *dev, unsigned cmdbuf, unsigned mappings,
uint64_t scalar)
{
#if __APPLE__
struct agx_submit_cmdbuf_req req = {
@ -642,11 +625,9 @@ agx_submit_cmdbuf(struct agx_device *dev, unsigned cmdbuf, unsigned mappings, ui
.notify_2 = 0x1234,
};
ASSERTED kern_return_t ret = IOConnectCallMethod(dev->fd,
AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS,
&scalar, 1,
&req, sizeof(req),
NULL, 0, NULL, 0);
ASSERTED kern_return_t ret =
IOConnectCallMethod(dev->fd, AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS, &scalar,
1, &req, sizeof(req), NULL, 0, NULL, 0);
assert(ret == 0);
return;
#endif
@ -670,17 +651,19 @@ agx_wait_queue(struct agx_command_queue queue)
uint64_t data[4];
unsigned sz = sizeof(data);
unsigned message_id = 0;
uint64_t magic_numbers[2] = { 0xABCD, 0x1234 };
uint64_t magic_numbers[2] = {0xABCD, 0x1234};
while (message_id < 2) {
IOReturn ret = IODataQueueWaitForAvailableData(queue.notif.queue, queue.notif.port);
IOReturn ret =
IODataQueueWaitForAvailableData(queue.notif.queue, queue.notif.port);
if (ret) {
fprintf(stderr, "Error waiting for available data\n");
return;
}
while (IODataQueueDequeue(queue.notif.queue, data, &sz) == kIOReturnSuccess) {
while (IODataQueueDequeue(queue.notif.queue, data, &sz) ==
kIOReturnSuccess) {
assert(sz == sizeof(data));
assert(data[0] == magic_numbers[message_id]);
message_id++;

View file

@ -24,23 +24,23 @@
#ifndef __AGX_DEVICE_H
#define __AGX_DEVICE_H
#include "util/sparse_array.h"
#include "util/simple_mtx.h"
#include "io.h"
#include "util/sparse_array.h"
#include "agx_formats.h"
#include "io.h"
#if __APPLE__
#include <mach/mach.h>
#include <IOKit/IOKitLib.h>
#include <mach/mach.h>
#endif
enum agx_dbg {
AGX_DBG_TRACE = BITFIELD_BIT(0),
AGX_DBG_DEQP = BITFIELD_BIT(1),
AGX_DBG_NO16 = BITFIELD_BIT(2),
AGX_DBG_DIRTY = BITFIELD_BIT(3),
AGX_DBG_PRECOMPILE = BITFIELD_BIT(4),
AGX_DBG_PERF = BITFIELD_BIT(5),
AGX_DBG_DEQP = BITFIELD_BIT(1),
AGX_DBG_NO16 = BITFIELD_BIT(2),
AGX_DBG_DIRTY = BITFIELD_BIT(3),
AGX_DBG_PRECOMPILE = BITFIELD_BIT(4),
AGX_DBG_PERF = BITFIELD_BIT(5),
AGX_DBG_NOCOMPRESS = BITFIELD_BIT(6),
};
@ -94,11 +94,9 @@ struct agx_device {
} bo_cache;
};
bool
agx_open_device(void *memctx, struct agx_device *dev);
bool agx_open_device(void *memctx, struct agx_device *dev);
void
agx_close_device(struct agx_device *dev);
void agx_close_device(struct agx_device *dev);
static inline struct agx_bo *
agx_lookup_bo(struct agx_device *dev, uint32_t handle)
@ -106,22 +104,17 @@ agx_lookup_bo(struct agx_device *dev, uint32_t handle)
return util_sparse_array_get(&dev->bo_map, handle);
}
struct agx_bo
agx_shmem_alloc(struct agx_device *dev, size_t size, bool cmdbuf);
struct agx_bo agx_shmem_alloc(struct agx_device *dev, size_t size, bool cmdbuf);
void
agx_shmem_free(struct agx_device *dev, unsigned handle);
void agx_shmem_free(struct agx_device *dev, unsigned handle);
uint64_t
agx_get_global_id(struct agx_device *dev);
uint64_t agx_get_global_id(struct agx_device *dev);
struct agx_command_queue
agx_create_command_queue(struct agx_device *dev);
struct agx_command_queue agx_create_command_queue(struct agx_device *dev);
void
agx_submit_cmdbuf(struct agx_device *dev, unsigned cmdbuf, unsigned mappings, uint64_t scalar);
void agx_submit_cmdbuf(struct agx_device *dev, unsigned cmdbuf,
unsigned mappings, uint64_t scalar);
void
agx_wait_queue(struct agx_command_queue queue);
void agx_wait_queue(struct agx_command_queue queue);
#endif

View file

@ -21,20 +21,20 @@
* SOFTWARE.
*/
#include "agx_pack.h"
#include "agx_formats.h"
#include "agx_internal_formats.h"
#include "agx_pack.h"
#define T true
#define F false
#define T true
#define F false
#define AGX_INTERNAL_FORMAT__ PIPE_FORMAT_NONE
#define AGX_FMT(pipe, channels_, type_, is_renderable, internal_fmt) \
[PIPE_FORMAT_ ## pipe] = { \
.channels = AGX_CHANNELS_ ## channels_, \
.type = AGX_TEXTURE_TYPE_ ## type_, \
.renderable = is_renderable, \
.internal = (enum pipe_format) AGX_INTERNAL_FORMAT_ ## internal_fmt,\
#define AGX_FMT(pipe, channels_, type_, is_renderable, internal_fmt) \
[PIPE_FORMAT_##pipe] = { \
.channels = AGX_CHANNELS_##channels_, \
.type = AGX_TEXTURE_TYPE_##type_, \
.renderable = is_renderable, \
.internal = (enum pipe_format)AGX_INTERNAL_FORMAT_##internal_fmt, \
}
/* clang-format off */

View file

@ -3,11 +3,11 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_meta.h"
#include "agx_compile.h"
#include "agx_device.h" /* for AGX_MEMORY_TYPE_SHADER */
#include "agx_tilebuffer.h"
#include "nir_builder.h"
#include "agx_meta.h"
#include "agx_device.h" /* for AGX_MEMORY_TYPE_SHADER */
static struct agx_meta_shader *
agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
@ -25,8 +25,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
agx_compile_shader_nir(shader, key, NULL, &binary, &res->info);
res->ptr = agx_pool_upload_aligned_with_bo(&cache->pool, binary.data,
binary.size, 128,
&res->bo);
binary.size, 128, &res->bo);
util_dynarray_fini(&binary);
return res;
@ -77,9 +76,8 @@ static struct agx_meta_shader *
agx_build_background_shader(struct agx_meta_cache *cache,
struct agx_meta_key *key)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
&agx_nir_options,
"agx_background");
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT, &agx_nir_options, "agx_background");
b.shader->info.fs.untyped_color_outputs = true;
struct agx_shader_key compiler_key = {
@ -94,11 +92,13 @@ agx_build_background_shader(struct agx_meta_cache *cache,
bool msaa = key->tib.nr_samples > 1;
assert(nr > 0);
nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(GLSL_TYPE_UINT, nr), "output");
nir_variable *out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(GLSL_TYPE_UINT, nr), "output");
out->data.location = FRAG_RESULT_DATA0 + rt;
nir_store_var(&b, out, build_background_op(&b, key->op[rt], rt, nr, msaa), 0xFF);
nir_store_var(&b, out, build_background_op(&b, key->op[rt], rt, nr, msaa),
0xFF);
}
return agx_compile_meta_shader(cache, b.shader, &compiler_key, &key->tib);
@ -109,25 +109,23 @@ agx_build_end_of_tile_shader(struct agx_meta_cache *cache,
struct agx_meta_key *key)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
&agx_nir_options,
"agx_eot");
&agx_nir_options, "agx_eot");
enum glsl_sampler_dim dim = (key->tib.nr_samples > 1) ?
GLSL_SAMPLER_DIM_MS :
GLSL_SAMPLER_DIM_2D;
enum glsl_sampler_dim dim =
(key->tib.nr_samples > 1) ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
if (key->op[rt] == AGX_META_OP_NONE)
continue;
assert(key->op[rt] == AGX_META_OP_STORE);
nir_block_image_store_agx(&b, nir_imm_int(&b, rt),
nir_imm_intN_t(&b, key->tib.offset_B[rt], 16),
.format = agx_tilebuffer_physical_format(&key->tib, rt),
.image_dim = dim);
nir_block_image_store_agx(
&b, nir_imm_int(&b, rt), nir_imm_intN_t(&b, key->tib.offset_B[rt], 16),
.format = agx_tilebuffer_physical_format(&key->tib, rt),
.image_dim = dim);
}
struct agx_shader_key compiler_key = { 0 };
struct agx_shader_key compiler_key = {0};
return agx_compile_meta_shader(cache, b.shader, &compiler_key, NULL);
}
@ -168,8 +166,7 @@ key_compare(const void *a, const void *b)
}
void
agx_meta_init(struct agx_meta_cache *cache,
struct agx_device *dev,
agx_meta_init(struct agx_meta_cache *cache, struct agx_device *dev,
void *memctx)
{
agx_pool_init(&cache->pool, dev, AGX_MEMORY_TYPE_SHADER, true);

View file

@ -36,11 +36,10 @@ struct agx_meta_shader {
uint32_t ptr;
};
struct agx_meta_shader *
agx_get_meta_shader(struct agx_meta_cache *cache, struct agx_meta_key *key);
struct agx_meta_shader *agx_get_meta_shader(struct agx_meta_cache *cache,
struct agx_meta_key *key);
void
agx_meta_init(struct agx_meta_cache *cache, struct agx_device *dev,
void *memctx);
void agx_meta_init(struct agx_meta_cache *cache, struct agx_device *dev,
void *memctx);
#endif

View file

@ -6,9 +6,9 @@
#ifndef __AGX_NIR_FORMAT_HELPERS_H
#define __AGX_NIR_FORMAT_HELPERS_H
#include "util/format/u_formats.h"
#include "nir_builder.h"
#include "nir_format_convert.h"
#include "util/format/u_formats.h"
static inline nir_ssa_def *
nir_sign_extend_if_sint(nir_builder *b, nir_ssa_def *x, enum pipe_format format)
@ -17,7 +17,7 @@ nir_sign_extend_if_sint(nir_builder *b, nir_ssa_def *x, enum pipe_format format)
return x;
const struct util_format_description *desc = util_format_description(format);
unsigned bits[4] = { 0 };
unsigned bits[4] = {0};
for (unsigned i = 0; i < desc->nr_channels; ++i) {
assert(desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED);

View file

@ -3,10 +3,10 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_nir_format_helpers.h"
#include "agx_tilebuffer.h"
#include "nir.h"
#include "nir_builder.h"
#include "agx_nir_format_helpers.h"
#define ALL_SAMPLES 0xFF
@ -52,12 +52,10 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
/* Trim to format as required by hardware */
value = nir_trim_vector(b, intr->src[0].ssa, comps);
nir_store_local_pixel_agx(b, value,
sample_mask,
.base = tib->offset_B[rt],
.write_mask = nir_intrinsic_write_mask(intr) &
BITFIELD_MASK(comps),
.format = format);
nir_store_local_pixel_agx(
b, value, sample_mask, .base = tib->offset_B[rt],
.write_mask = nir_intrinsic_write_mask(intr) & BITFIELD_MASK(comps),
.format = format);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
} else {
@ -75,11 +73,9 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
if (f16)
format = PIPE_FORMAT_R16_UINT;
nir_ssa_def *res = nir_load_local_pixel_agx(b, MIN2(intr->num_components, comps),
f16 ? 16 : bit_size,
sample_mask,
.base = tib->offset_B[rt],
.format = format);
nir_ssa_def *res = nir_load_local_pixel_agx(
b, MIN2(intr->num_components, comps), f16 ? 16 : bit_size, sample_mask,
.base = tib->offset_B[rt], .format = format);
/* Extend floats */
if (f16 && nir_dest_bit_size(intr->dest) != 16) {

View file

@ -11,7 +11,7 @@
static bool
is_rgb10_a2(const struct util_format_description *desc)
{
return desc->channel[0].shift == 0 && desc->channel[0].size == 10 &&
return desc->channel[0].shift == 0 && desc->channel[0].size == 10 &&
desc->channel[1].shift == 10 && desc->channel[1].size == 10 &&
desc->channel[2].shift == 20 && desc->channel[2].size == 10 &&
desc->channel[3].shift == 30 && desc->channel[3].size == 2;
@ -60,10 +60,14 @@ agx_vbo_internal_format(enum pipe_format format)
/* Otherwise map to the corresponding integer format */
switch (chan.size) {
case 32: return PIPE_FORMAT_R32_UINT;
case 16: return PIPE_FORMAT_R16_UINT;
case 8: return PIPE_FORMAT_R8_UINT;
default: return PIPE_FORMAT_NONE;
case 32:
return PIPE_FORMAT_R32_UINT;
case 16:
return PIPE_FORMAT_R16_UINT;
case 8:
return PIPE_FORMAT_R8_UINT;
default:
return PIPE_FORMAT_NONE;
}
}
@ -74,18 +78,25 @@ agx_vbo_supports_format(enum pipe_format format)
}
static nir_ssa_def *
apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec,
unsigned swizzle, bool is_int)
apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec, unsigned swizzle,
bool is_int)
{
switch (swizzle) {
case PIPE_SWIZZLE_X: return nir_channel(b, vec, 0);
case PIPE_SWIZZLE_Y: return nir_channel(b, vec, 1);
case PIPE_SWIZZLE_Z: return nir_channel(b, vec, 2);
case PIPE_SWIZZLE_W: return nir_channel(b, vec, 3);
case PIPE_SWIZZLE_0: return nir_imm_intN_t(b, 0, vec->bit_size);
case PIPE_SWIZZLE_1: return is_int ? nir_imm_intN_t(b, 1, vec->bit_size) :
nir_imm_floatN_t(b, 1.0, vec->bit_size);
default: unreachable("Invalid swizzle channel");
case PIPE_SWIZZLE_X:
return nir_channel(b, vec, 0);
case PIPE_SWIZZLE_Y:
return nir_channel(b, vec, 1);
case PIPE_SWIZZLE_Z:
return nir_channel(b, vec, 2);
case PIPE_SWIZZLE_W:
return nir_channel(b, vec, 3);
case PIPE_SWIZZLE_0:
return nir_imm_intN_t(b, 0, vec->bit_size);
case PIPE_SWIZZLE_1:
return is_int ? nir_imm_intN_t(b, 1, vec->bit_size)
: nir_imm_floatN_t(b, 1.0, vec->bit_size);
default:
unreachable("Invalid swizzle channel");
}
}
@ -115,11 +126,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
int chan = util_format_get_first_non_void_channel(attrib.format);
assert(chan >= 0);
bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED;
bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED;
bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
bool is_int = util_format_is_pure_integer(attrib.format);
bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED;
bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
bool is_int = util_format_is_pure_integer(attrib.format);
assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format");
@ -134,9 +145,10 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
* zero extended.
*/
unsigned interchange_register_size =
util_format_is_pure_uint(interchange_format) && !util_format_is_pure_uint(attrib.format) ?
(interchange_align * 8):
nir_dest_bit_size(intr->dest);
util_format_is_pure_uint(interchange_format) &&
!util_format_is_pure_uint(attrib.format)
? (interchange_align * 8)
: nir_dest_bit_size(intr->dest);
/* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */
if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array)
@ -145,9 +157,10 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
/* Calculate the element to fetch the vertex for. Divide the instance ID by
* the divisor for per-instance data. Divisor=0 specifies per-vertex data.
*/
nir_ssa_def *el = (attrib.divisor == 0) ?
nir_load_vertex_id(b) :
nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
nir_ssa_def *el =
(attrib.divisor == 0)
? nir_load_vertex_id(b)
: nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
nir_ssa_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
@ -161,18 +174,15 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el);
/* Load the raw vector */
nir_ssa_def *memory =
nir_load_constant_agx(b, interchange_comps,
interchange_register_size,
base,
stride_offset_el,
.format = interchange_format);
nir_ssa_def *memory = nir_load_constant_agx(
b, interchange_comps, interchange_register_size, base, stride_offset_el,
.format = interchange_format);
unsigned dest_size = nir_dest_bit_size(intr->dest);
/* Unpack but do not convert non-native non-array formats */
if (is_rgb10_a2(desc) && interchange_format == PIPE_FORMAT_R32_UINT) {
unsigned bits[] = { 10, 10, 10, 2 };
unsigned bits[] = {10, 10, 10, 2};
if (is_signed)
memory = nir_format_unpack_sint(b, memory, bits, 4);
@ -183,11 +193,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
if (desc->channel[chan].normalized) {
/* 8/16-bit normalized formats are native, others converted here */
if (is_rgb10_a2(desc) && is_signed) {
unsigned bits[] = { 10, 10, 10, 2 };
unsigned bits[] = {10, 10, 10, 2};
memory = nir_format_snorm_to_float(b, memory, bits);
} else if (desc->channel[chan].size == 32) {
assert(desc->is_array && "no non-array 32-bit norm formats");
unsigned bits[] = { 32, 32, 32, 32 };
unsigned bits[] = {32, 32, 32, 32};
if (is_signed)
memory = nir_format_snorm_to_float(b, memory, bits);
@ -217,7 +227,7 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
/* We now have a properly formatted vector of the components in memory. Apply
* the format swizzle forwards to trim/pad/reorder as needed.
*/
nir_ssa_def *channels[4] = { NULL };
nir_ssa_def *channels[4] = {NULL};
assert(nir_intrinsic_component(intr) == 0 && "unimplemented");
for (unsigned i = 0; i < intr->num_components; ++i)
@ -232,8 +242,6 @@ bool
agx_nir_lower_vbo(nir_shader *shader, struct agx_vbufs *vbufs)
{
assert(shader->info.stage == MESA_SHADER_VERTEX);
return nir_shader_instructions_pass(shader, pass,
nir_metadata_block_index |
nir_metadata_dominance,
vbufs);
return nir_shader_instructions_pass(
shader, pass, nir_metadata_block_index | nir_metadata_dominance, vbufs);
}

View file

@ -6,17 +6,17 @@
#ifndef __AGX_NIR_LOWER_VBO_H
#define __AGX_NIR_LOWER_VBO_H
#include <stdint.h>
#include <stdbool.h>
#include "nir.h"
#include <stdint.h>
#include "util/format/u_formats.h"
#include "nir.h"
#ifdef __cplusplus
extern "C" {
#endif
#define AGX_MAX_ATTRIBS (16)
#define AGX_MAX_VBUFS (16)
#define AGX_MAX_VBUFS (16)
/* See pipe_vertex_element for justification on the sizes. This structure should
* be small so it can be embedded into a shader key.

View file

@ -41,7 +41,9 @@ agx_ppp_update_size(struct AGX_PPP_HEADER *present)
{
size_t size = AGX_PPP_HEADER_LENGTH;
#define PPP_CASE(x, y) if (present->x) size += AGX_ ## y ##_LENGTH;
#define PPP_CASE(x, y) \
if (present->x) \
size += AGX_##y##_LENGTH;
PPP_CASE(fragment_control, FRAGMENT_CONTROL);
PPP_CASE(fragment_control_2, FRAGMENT_CONTROL_2);
PPP_CASE(fragment_front_face, FRAGMENT_FACE);
@ -84,16 +86,17 @@ agx_ppp_validate(struct agx_ppp_update *ppp, size_t size)
return true;
}
#define agx_ppp_push(ppp, T, name) \
for (bool it = agx_ppp_validate((ppp), AGX_##T##_LENGTH); it; it = false, \
(ppp)->head += AGX_##T##_LENGTH) \
#define agx_ppp_push(ppp, T, name) \
for (bool it = agx_ppp_validate((ppp), AGX_##T##_LENGTH); it; \
it = false, (ppp)->head += AGX_##T##_LENGTH) \
agx_pack((ppp)->head, T, name)
#define agx_ppp_push_packed(ppp, src, T) do { \
agx_ppp_validate((ppp), AGX_##T##_LENGTH); \
memcpy((ppp)->head, src, AGX_##T##_LENGTH); \
(ppp)->head += AGX_##T##_LENGTH; \
} while(0) \
#define agx_ppp_push_packed(ppp, src, T) \
do { \
agx_ppp_validate((ppp), AGX_##T##_LENGTH); \
memcpy((ppp)->head, src, AGX_##T##_LENGTH); \
(ppp)->head += AGX_##T##_LENGTH; \
} while (0)
static inline struct agx_ppp_update
agx_new_ppp_update(struct agx_pool *pool, struct AGX_PPP_HEADER present)
@ -110,7 +113,9 @@ agx_new_ppp_update(struct agx_pool *pool, struct AGX_PPP_HEADER present)
#endif
};
agx_ppp_push(&ppp, PPP_HEADER, cfg) { cfg = present; }
agx_ppp_push(&ppp, PPP_HEADER, cfg) {
cfg = present;
}
return ppp;
}
@ -131,7 +136,7 @@ agx_ppp_fini(uint8_t **out, struct agx_ppp_update *ppp)
agx_pack(*out, PPP_STATE, cfg) {
cfg.pointer_hi = (ppp->gpu_base >> 32);
cfg.pointer_lo = (uint32_t) ppp->gpu_base;
cfg.pointer_lo = (uint32_t)ppp->gpu_base;
cfg.size_words = size_words;
};

View file

@ -3,9 +3,9 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_tilebuffer.h"
#include <assert.h>
#include "util/format/u_format.h"
#include "agx_tilebuffer.h"
#include "agx_formats.h"
#include "agx_usc.h"
@ -33,17 +33,14 @@ agx_select_tile_size(unsigned bytes_per_pixel)
return size;
}
unreachable("No supported tile size meets the bytes per pixel requirement");
unreachable("No supported tile size meets the bytes per pixel requirement");
}
struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(enum pipe_format *formats,
uint8_t nr_cbufs,
agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
uint8_t nr_samples)
{
struct agx_tilebuffer_layout tib = {
.nr_samples = nr_samples
};
struct agx_tilebuffer_layout tib = {.nr_samples = nr_samples};
uint32_t offset_B = 0;
@ -59,8 +56,9 @@ agx_build_tilebuffer_layout(enum pipe_format *formats,
tib.offset_B[rt] = offset_B;
unsigned nr = util_format_get_nr_components(physical_fmt) == 1 ?
util_format_get_nr_components(formats[rt]) : 1;
unsigned nr = util_format_get_nr_components(physical_fmt) == 1
? util_format_get_nr_components(formats[rt])
: 1;
unsigned size_B = align_B * nr;
offset_B += size_B;
@ -95,8 +93,8 @@ agx_shared_layout_from_tile_size(struct agx_tile_size t)
uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
{
return tib->sample_size_B * tib->nr_samples *
tib->tile_size.width * tib->tile_size.height;
return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
tib->tile_size.height;
}
void

View file

@ -6,8 +6,8 @@
#ifndef __AGX_TILEBUFFER_H
#define __AGX_TILEBUFFER_H
#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>
#include "util/format/u_formats.h"
#ifdef __cplusplus
@ -43,16 +43,16 @@ struct agx_tilebuffer_layout {
};
struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs, uint8_t nr_samples);
agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
uint8_t nr_samples);
bool
agx_nir_lower_tilebuffer(struct nir_shader *shader, struct agx_tilebuffer_layout *tib);
bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
struct agx_tilebuffer_layout *tib);
void
agx_usc_tilebuffer(struct agx_usc_builder *b, struct agx_tilebuffer_layout *tib);
void agx_usc_tilebuffer(struct agx_usc_builder *b,
struct agx_tilebuffer_layout *tib);
uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib);
uint32_t agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib);
enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt);

View file

@ -20,8 +20,7 @@ struct agx_usc_builder {
};
static struct agx_usc_builder
agx_alloc_usc_control(struct agx_pool *pool,
unsigned num_reg_bindings)
agx_alloc_usc_control(struct agx_pool *pool, unsigned num_reg_bindings)
{
STATIC_ASSERT(AGX_USC_UNIFORM_HIGH_LENGTH == AGX_USC_UNIFORM_LENGTH);
STATIC_ASSERT(AGX_USC_TEXTURE_LENGTH == AGX_USC_UNIFORM_LENGTH);
@ -43,7 +42,7 @@ agx_alloc_usc_control(struct agx_pool *pool,
#endif
};
b.head = (uint8_t *) b.T.cpu;
b.head = (uint8_t *)b.T.cpu;
return b;
}
@ -52,15 +51,16 @@ static bool
agx_usc_builder_validate(struct agx_usc_builder *b, size_t size)
{
#ifndef NDEBUG
assert(((b->head - (uint8_t *) b->T.cpu) + size) <= b->size);
assert(((b->head - (uint8_t *)b->T.cpu) + size) <= b->size);
#endif
return true;
}
#define agx_usc_pack(b, struct_name, template) \
for (bool it = agx_usc_builder_validate((b), AGX_USC_##struct_name##_LENGTH); \
it; it = false, (b)->head += AGX_USC_##struct_name##_LENGTH) \
#define agx_usc_pack(b, struct_name, template) \
for (bool it = \
agx_usc_builder_validate((b), AGX_USC_##struct_name##_LENGTH); \
it; it = false, (b)->head += AGX_USC_##struct_name##_LENGTH) \
agx_pack((b)->head, USC_##struct_name, template)
static void

View file

@ -23,20 +23,20 @@
* SOFTWARE.
*/
#include <agx_pack.h>
#include <ctype.h>
#include <memory.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <stdbool.h>
#include <stdarg.h>
#include <ctype.h>
#include <sys/mman.h>
#include <agx_pack.h>
#include "decode.h"
#include "io.h"
#include "hexdump.h"
#include "io.h"
static const char *agx_alloc_types[AGX_NUM_ALLOC] = { "mem", "map", "cmd" };
static const char *agx_alloc_types[AGX_NUM_ALLOC] = {"mem", "map", "cmd"};
static void
agx_disassemble(void *_code, size_t maxlen, FILE *fp)
@ -58,7 +58,9 @@ static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
{
for (unsigned i = 0; i < mmap_count; ++i) {
if (mmap_array[i].type == AGX_ALLOC_REGULAR && addr >= mmap_array[i].ptr.gpu && (addr - mmap_array[i].ptr.gpu) < mmap_array[i].size)
if (mmap_array[i].type == AGX_ALLOC_REGULAR &&
addr >= mmap_array[i].ptr.gpu &&
(addr - mmap_array[i].ptr.gpu) < mmap_array[i].size)
return mmap_array + i;
}
@ -78,7 +80,10 @@ agxdecode_find_mapped_gpu_mem_containing(uint64_t addr)
}
if (mem && !mem->mapped) {
fprintf(stderr, "[ERROR] access to memory not mapped (GPU %" PRIx64 ", handle %u)\n", mem->ptr.gpu, mem->handle);
fprintf(stderr,
"[ERROR] access to memory not mapped (GPU %" PRIx64
", handle %u)\n",
mem->ptr.gpu, mem->handle);
}
return mem;
@ -132,16 +137,18 @@ agxdecode_decode_segment_list(void *segment_list)
if (hdr->segment_count != 1) {
fprintf(agxdecode_dump_stream, "ERROR - can't handle segment count %u\n",
hdr->segment_count);
hdr->segment_count);
}
fprintf(agxdecode_dump_stream, "Segment list:\n");
fprintf(agxdecode_dump_stream, " Command buffer shmem ID: %" PRIx64 "\n", hdr->cmdbuf_id);
fprintf(agxdecode_dump_stream, " Encoder ID: %" PRIx64 "\n", hdr->encoder_id);
fprintf(agxdecode_dump_stream, " Command buffer shmem ID: %" PRIx64 "\n",
hdr->cmdbuf_id);
fprintf(agxdecode_dump_stream, " Encoder ID: %" PRIx64 "\n",
hdr->encoder_id);
fprintf(agxdecode_dump_stream, " Kernel commands start offset: %u\n",
hdr->kernel_commands_start_offset);
hdr->kernel_commands_start_offset);
fprintf(agxdecode_dump_stream, " Kernel commands end offset: %u\n",
hdr->kernel_commands_end_offset);
hdr->kernel_commands_end_offset);
fprintf(agxdecode_dump_stream, " Unknown: 0x%X\n", hdr->unk);
/* Expected structure: header followed by resource groups */
@ -157,7 +164,7 @@ agxdecode_decode_segment_list(void *segment_list)
fprintf(agxdecode_dump_stream, "ERROR - padding tripped\n");
/* Check the entries */
struct agx_map_entry *groups = ((void *) hdr) + sizeof(*hdr);
struct agx_map_entry *groups = ((void *)hdr) + sizeof(*hdr);
for (unsigned i = 0; i < hdr->resource_group_count; ++i) {
struct agx_map_entry group = groups[i];
unsigned count = group.resource_count;
@ -170,21 +177,23 @@ agxdecode_decode_segment_list(void *segment_list)
fprintf(agxdecode_dump_stream, "ERROR - invalid count %u\n", count);
continue;
}
for (unsigned j = 0; j < count; ++j) {
unsigned handle = group.resource_id[j];
unsigned unk = group.resource_unk[j];
unsigned flags = group.resource_flags[j];
if (!handle) {
fprintf(agxdecode_dump_stream, "ERROR - invalid handle %u\n", handle);
fprintf(agxdecode_dump_stream, "ERROR - invalid handle %u\n",
handle);
continue;
}
agxdecode_mark_mapped(handle);
nr_handles++;
fprintf(agxdecode_dump_stream, "%u (0x%X, 0x%X)\n", handle, unk, flags);
fprintf(agxdecode_dump_stream, "%u (0x%X, 0x%X)\n", handle, unk,
flags);
}
if (group.unka)
@ -196,22 +205,22 @@ agxdecode_decode_segment_list(void *segment_list)
/* Check the handle count */
if (nr_handles != hdr->total_resources) {
fprintf(agxdecode_dump_stream, "ERROR - wrong handle count, got %u, expected %u (%u entries)\n",
nr_handles, hdr->total_resources, hdr->resource_group_count);
fprintf(agxdecode_dump_stream,
"ERROR - wrong handle count, got %u, expected %u (%u entries)\n",
nr_handles, hdr->total_resources, hdr->resource_group_count);
}
}
static inline void *
__agxdecode_fetch_gpu_mem(const struct agx_bo *mem,
uint64_t gpu_va, size_t size,
int line, const char *filename)
__agxdecode_fetch_gpu_mem(const struct agx_bo *mem, uint64_t gpu_va,
size_t size, int line, const char *filename)
{
if (!mem)
mem = agxdecode_find_mapped_gpu_mem_containing(gpu_va);
if (!mem) {
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
gpu_va, filename, line);
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
filename, line);
fflush(agxdecode_dump_stream);
assert(0);
}
@ -222,8 +231,8 @@ __agxdecode_fetch_gpu_mem(const struct agx_bo *mem,
return mem->ptr.cpu + gpu_va - mem->ptr.gpu;
}
#define agxdecode_fetch_gpu_mem(gpu_va, size) \
__agxdecode_fetch_gpu_mem(NULL, gpu_va, size, __LINE__, __FILE__)
#define agxdecode_fetch_gpu_mem(gpu_va, size) \
__agxdecode_fetch_gpu_mem(NULL, gpu_va, size, __LINE__, __FILE__)
static void
agxdecode_map_read_write(void)
@ -239,15 +248,17 @@ agxdecode_map_read_write(void)
/* Helpers for parsing the cmdstream */
#define DUMP_UNPACKED(T, var, str) { \
agxdecode_log(str); \
agx_print(agxdecode_dump_stream, T, var, (agxdecode_indent + 1) * 2); \
}
#define DUMP_UNPACKED(T, var, str) \
{ \
agxdecode_log(str); \
agx_print(agxdecode_dump_stream, T, var, (agxdecode_indent + 1) * 2); \
}
#define DUMP_CL(T, cl, str) {\
agx_unpack(agxdecode_dump_stream, cl, T, temp); \
DUMP_UNPACKED(T, temp, str "\n"); \
}
#define DUMP_CL(T, cl, str) \
{ \
agx_unpack(agxdecode_dump_stream, cl, T, temp); \
DUMP_UNPACKED(T, temp, str "\n"); \
}
#define agxdecode_log(str) fputs(str, agxdecode_dump_stream)
#define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
@ -257,26 +268,30 @@ unsigned agxdecode_indent = 0;
static void
agxdecode_dump_bo(struct agx_bo *bo, const char *name)
{
fprintf(agxdecode_dump_stream, "%s %s (%u)\n", name, bo->name ?: "", bo->handle);
fprintf(agxdecode_dump_stream, "%s %s (%u)\n", name, bo->name ?: "",
bo->handle);
hexdump(agxdecode_dump_stream, bo->ptr.cpu, bo->size, false);
}
/* Abstraction for command stream parsing */
typedef unsigned (*decode_cmd)(const uint8_t *map, uint64_t *link, bool verbose);
typedef unsigned (*decode_cmd)(const uint8_t *map, uint64_t *link,
bool verbose);
#define STATE_DONE (0xFFFFFFFFu)
#define STATE_LINK (0xFFFFFFFEu)
static void
agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verbose)
agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder,
bool verbose)
{
struct agx_bo *alloc = agxdecode_find_mapped_gpu_mem_containing(va);
assert(alloc != NULL && "nonexistant object");
fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va, alloc->handle);
fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va,
alloc->handle);
fflush(agxdecode_dump_stream);
uint8_t *map = agxdecode_fetch_gpu_mem(va, 64);
uint8_t *end = (uint8_t *) alloc->ptr.cpu + alloc->size;
uint8_t *end = (uint8_t *)alloc->ptr.cpu + alloc->size;
uint64_t link = 0;
if (verbose)
@ -300,7 +315,7 @@ agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verb
} else if (count == STATE_LINK) {
alloc = agxdecode_find_mapped_gpu_mem_containing(link);
map = agxdecode_fetch_gpu_mem(link, 64);
end = (uint8_t *) alloc->ptr.cpu + alloc->size;
end = (uint8_t *)alloc->ptr.cpu + alloc->size;
}
}
}
@ -310,10 +325,10 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
{
enum agx_usc_control type = map[0];
#define USC_CASE(name, human) \
case AGX_USC_CONTROL_##name: { \
DUMP_CL(USC_##name, map, human); \
return AGX_USC_##name##_LENGTH; \
#define USC_CASE(name, human) \
case AGX_USC_CONTROL_##name: { \
DUMP_CL(USC_##name, map, human); \
return AGX_USC_##name##_LENGTH; \
}
switch (type) {
@ -326,8 +341,8 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl);
DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n");
agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048),
8192, agxdecode_dump_stream);
agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), 8192,
agxdecode_dump_stream);
return STATE_DONE;
}
@ -337,8 +352,8 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n");
agxdecode_log("\n");
agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048),
8192, agxdecode_dump_stream);
agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), 8192,
agxdecode_dump_stream);
agxdecode_log("\n");
return AGX_USC_SHADER_LENGTH;
@ -348,8 +363,8 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp);
DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n");
uint8_t *samp = agxdecode_fetch_gpu_mem(temp.buffer,
AGX_SAMPLER_LENGTH * temp.count);
uint8_t *samp =
agxdecode_fetch_gpu_mem(temp.buffer, AGX_SAMPLER_LENGTH * temp.count);
for (unsigned i = 0; i < temp.count; ++i) {
DUMP_CL(SAMPLER, samp, "Sampler");
@ -363,8 +378,8 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp);
DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n");
uint8_t *tex = agxdecode_fetch_gpu_mem(temp.buffer,
AGX_TEXTURE_LENGTH * temp.count);
uint8_t *tex =
agxdecode_fetch_gpu_mem(temp.buffer, AGX_TEXTURE_LENGTH * temp.count);
/* Note: samplers only need 8 byte alignment? */
for (unsigned i = 0; i < temp.count; ++i) {
@ -378,15 +393,14 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
return AGX_USC_TEXTURE_LENGTH;
}
USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
USC_CASE(UNIFORM, "Uniform");
USC_CASE(UNIFORM_HIGH, "Uniform high");
USC_CASE(SHARED, "Shared");
USC_CASE(REGISTERS, "Registers");
USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
USC_CASE(UNIFORM, "Uniform");
USC_CASE(UNIFORM_HIGH, "Uniform high");
USC_CASE(SHARED, "Shared");
USC_CASE(REGISTERS, "Registers");
default:
fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n",
type);
fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", type);
hexdump(agxdecode_dump_stream, map, 8, false);
return 8;
}
@ -394,12 +408,12 @@ agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose)
#undef USC_CASE
}
#define PPP_PRINT(map, header_name, struct_name, human) \
if (hdr.header_name) { \
assert(((map + AGX_##struct_name##_LENGTH) <= (base + size)) && \
"buffer overrun in PPP update"); \
DUMP_CL(struct_name, map, human); \
map += AGX_##struct_name##_LENGTH; \
#define PPP_PRINT(map, header_name, struct_name, human) \
if (hdr.header_name) { \
assert(((map + AGX_##struct_name##_LENGTH) <= (base + size)) && \
"buffer overrun in PPP update"); \
DUMP_CL(struct_name, map, human); \
map += AGX_##struct_name##_LENGTH; \
}
static void
@ -431,7 +445,8 @@ agxdecode_record(uint64_t va, size_t size, bool verbose)
if (hdr.fragment_shader) {
agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag);
agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc, verbose);
agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc,
verbose);
if (frag.cf_bindings) {
uint8_t *cf = agxdecode_fetch_gpu_mem(frag.cf_bindings, 128);
@ -451,7 +466,8 @@ agxdecode_record(uint64_t va, size_t size, bool verbose)
}
PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2, "Occlusion query 2");
PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2,
"Occlusion query 2");
PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
@ -477,7 +493,7 @@ agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose)
case AGX_CDM_BLOCK_TYPE_STREAM_LINK: {
agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr);
DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n");
*link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32);
*link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
return STATE_LINK;
}
@ -504,7 +520,7 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose)
case AGX_VDM_BLOCK_TYPE_PPP_STATE_UPDATE: {
agx_unpack(agxdecode_dump_stream, map, PPP_STATE, cmd);
uint64_t address = (((uint64_t) cmd.pointer_hi) << 32) | cmd.pointer_lo;
uint64_t address = (((uint64_t)cmd.pointer_hi) << 32) | cmd.pointer_lo;
struct agx_bo *mem = agxdecode_find_mapped_gpu_mem_containing(address);
if (mem)
@ -520,24 +536,28 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose)
agx_unpack(agxdecode_dump_stream, map, VDM_STATE, hdr);
map += AGX_VDM_STATE_LENGTH;
#define VDM_PRINT(header_name, STRUCT_NAME, human) \
if (hdr.header_name##_present) { \
DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human); \
map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
}
#define VDM_PRINT(header_name, STRUCT_NAME, human) \
if (hdr.header_name##_present) { \
DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human); \
map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
}
VDM_PRINT(restart_index, RESTART_INDEX, "Restart index");
VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0, "Vertex shader word 0");
VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0,
"Vertex shader word 0");
if (hdr.vertex_shader_word_1_present) {
agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1,
word_1);
fprintf(agxdecode_dump_stream, "Pipeline %X\n", (uint32_t) word_1.pipeline);
agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc, verbose);
fprintf(agxdecode_dump_stream, "Pipeline %X\n",
(uint32_t)word_1.pipeline);
agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc,
verbose);
}
VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1, "Vertex shader word 1");
VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1,
"Vertex shader word 1");
VDM_PRINT(vertex_outputs, VERTEX_OUTPUTS, "Vertex outputs");
VDM_PRINT(vertex_unknown, VERTEX_UNKNOWN, "Vertex unknown");
@ -551,12 +571,12 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose)
DUMP_UNPACKED(INDEX_LIST, hdr, "Index List\n");
map += AGX_INDEX_LIST_LENGTH;
#define IDX_PRINT(header_name, STRUCT_NAME, human) \
if (hdr.header_name##_present) { \
DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human); \
map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
}
#define IDX_PRINT(header_name, STRUCT_NAME, human) \
if (hdr.header_name##_present) { \
DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human); \
map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
}
IDX_PRINT(index_buffer, BUFFER_LO, "Index buffer");
IDX_PRINT(index_count, COUNT, "Index count");
@ -571,7 +591,7 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose)
case AGX_VDM_BLOCK_TYPE_STREAM_LINK: {
agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr);
DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n");
*link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32);
*link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
return STATE_LINK;
}
@ -607,25 +627,25 @@ agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose)
if (gfx.clear_pipeline_unk) {
fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk);
agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline",
agxdecode_usc, verbose);
agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", agxdecode_usc,
verbose);
}
if (gfx.store_pipeline_unk) {
assert(gfx.store_pipeline_unk == 0x4);
agxdecode_stateful(gfx.store_pipeline, "Store pipeline",
agxdecode_usc, verbose);
agxdecode_stateful(gfx.store_pipeline, "Store pipeline", agxdecode_usc,
verbose);
}
assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4);
if (gfx.partial_reload_pipeline) {
agxdecode_stateful(gfx.partial_reload_pipeline,
"Partial reload pipeline", agxdecode_usc, verbose);
agxdecode_stateful(gfx.partial_reload_pipeline, "Partial reload pipeline",
agxdecode_usc, verbose);
}
if (gfx.partial_store_pipeline) {
agxdecode_stateful(gfx.partial_store_pipeline,
"Partial store pipeline", agxdecode_usc, verbose);
agxdecode_stateful(gfx.partial_store_pipeline, "Partial store pipeline",
agxdecode_usc, verbose);
}
}
@ -634,7 +654,8 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose)
{
agxdecode_dump_file_open();
struct agx_bo *cmdbuf = agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
struct agx_bo *cmdbuf =
agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
assert(cmdbuf != NULL && "nonexistant command buffer");
assert(map != NULL && "nonexistant mapping");
@ -646,10 +667,12 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose)
agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd);
DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n");
DUMP_CL(IOGPU_ATTACHMENT_COUNT, ((uint8_t *) cmdbuf->ptr.cpu +
cmd.attachment_offset), "Attachment count");
DUMP_CL(IOGPU_ATTACHMENT_COUNT,
((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset),
"Attachment count");
uint32_t *attachments = (uint32_t *) ((uint8_t *) cmdbuf->ptr.cpu + cmd.attachment_offset);
uint32_t *attachments =
(uint32_t *)((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset);
unsigned attachment_count = attachments[3];
for (unsigned i = 0; i < attachment_count; ++i) {
uint32_t *ptr = attachments + 4 + (i * AGX_IOGPU_ATTACHMENT_LENGTH / 4);
@ -657,9 +680,9 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose)
}
if (cmd.unk_5 == 3)
agxdecode_cs((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose);
agxdecode_cs((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose);
else
agxdecode_gfx((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose);
agxdecode_gfx((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose);
agxdecode_map_read_write();
}
@ -674,16 +697,19 @@ agxdecode_dump_mappings(unsigned map_handle)
agxdecode_decode_segment_list(map->ptr.cpu);
for (unsigned i = 0; i < mmap_count; ++i) {
if (!mmap_array[i].ptr.cpu || !mmap_array[i].size || !mmap_array[i].mapped)
if (!mmap_array[i].ptr.cpu || !mmap_array[i].size ||
!mmap_array[i].mapped)
continue;
assert(mmap_array[i].type < AGX_NUM_ALLOC);
fprintf(agxdecode_dump_stream, "Buffer: type %s, gpu %" PRIx64 ", handle %u.bin:\n\n",
agx_alloc_types[mmap_array[i].type],
mmap_array[i].ptr.gpu, mmap_array[i].handle);
fprintf(agxdecode_dump_stream,
"Buffer: type %s, gpu %" PRIx64 ", handle %u.bin:\n\n",
agx_alloc_types[mmap_array[i].type], mmap_array[i].ptr.gpu,
mmap_array[i].handle);
hexdump(agxdecode_dump_stream, mmap_array[i].ptr.cpu, mmap_array[i].size, false);
hexdump(agxdecode_dump_stream, mmap_array[i].ptr.cpu, mmap_array[i].size,
false);
fprintf(agxdecode_dump_stream, "\n");
}
}
@ -708,7 +734,8 @@ agxdecode_track_free(struct agx_bo *bo)
bool found = false;
for (unsigned i = 0; i < mmap_count; ++i) {
if (mmap_array[i].handle == bo->handle && mmap_array[i].type == bo->type) {
if (mmap_array[i].handle == bo->handle &&
mmap_array[i].type == bo->type) {
assert(!found && "mapped multiple times!");
found = true;
@ -730,12 +757,14 @@ agxdecode_dump_file_open(void)
/* This does a getenv every frame, so it is possible to use
* setenv to change the base at runtime.
*/
const char *dump_file_base = getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
const char *dump_file_base =
getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
if (!strcmp(dump_file_base, "stderr"))
agxdecode_dump_stream = stderr;
else {
char buffer[1024];
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, agxdecode_dump_frame_count);
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
agxdecode_dump_frame_count);
printf("agxdecode: dump command stream to file %s\n", buffer);
agxdecode_dump_stream = fopen(buffer, "w");
if (!agxdecode_dump_stream)

View file

@ -32,7 +32,8 @@ void agxdecode_next_frame(void);
void agxdecode_close(void);
void agxdecode_cmdstream(unsigned cmdbuf_index, unsigned map_index, bool verbose);
void agxdecode_cmdstream(unsigned cmdbuf_index, unsigned map_index,
bool verbose);
void agxdecode_dump_file_open(void);

View file

@ -30,8 +30,12 @@
*/
#ifndef DYLD_INTERPOSE
#define DYLD_INTERPOSE(_replacement,_replacee) \
__attribute__((used)) static struct{ const void* replacement; const void* replacee; } _interpose_##_replacee \
__attribute__ ((section ("__DATA,__interpose"))) = { (const void*)(unsigned long)&_replacement, (const void*)(unsigned long)&_replacee };
#define DYLD_INTERPOSE(_replacement, _replacee) \
__attribute__((used)) static struct { \
const void *replacement; \
const void *replacee; \
} _interpose_##_replacee __attribute__((section("__DATA,__interpose"))) = { \
(const void *)(unsigned long)&_replacement, \
(const void *)(unsigned long)&_replacee};
#endif

View file

@ -28,8 +28,8 @@
#include "agx_bo.h"
#if __APPLE__
#include <mach/mach.h>
#include <IOKit/IODataQueueClient.h>
#include <mach/mach.h>
#endif
/*
@ -48,84 +48,83 @@
#define AGX_SERVICE_TYPE 0x100005
enum agx_selector {
AGX_SELECTOR_GET_GLOBAL_IDS = 0x6,
AGX_SELECTOR_SET_API = 0x7,
AGX_SELECTOR_CREATE_COMMAND_QUEUE = 0x8,
AGX_SELECTOR_FREE_COMMAND_QUEUE = 0x9,
AGX_SELECTOR_ALLOCATE_MEM = 0xA,
AGX_SELECTOR_FREE_MEM = 0xB,
AGX_SELECTOR_CREATE_SHMEM = 0xF,
AGX_SELECTOR_FREE_SHMEM = 0x10,
AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE = 0x11,
AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12,
AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E,
AGX_SELECTOR_GET_VERSION = 0x23,
AGX_NUM_SELECTORS = 0x32
AGX_SELECTOR_GET_GLOBAL_IDS = 0x6,
AGX_SELECTOR_SET_API = 0x7,
AGX_SELECTOR_CREATE_COMMAND_QUEUE = 0x8,
AGX_SELECTOR_FREE_COMMAND_QUEUE = 0x9,
AGX_SELECTOR_ALLOCATE_MEM = 0xA,
AGX_SELECTOR_FREE_MEM = 0xB,
AGX_SELECTOR_CREATE_SHMEM = 0xF,
AGX_SELECTOR_FREE_SHMEM = 0x10,
AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE = 0x11,
AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12,
AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E,
AGX_SELECTOR_GET_VERSION = 0x23,
AGX_NUM_SELECTORS = 0x32
};
static const char *selector_table[AGX_NUM_SELECTORS] = {
"unk0",
"unk1",
"unk2",
"unk3",
"unk4",
"unk5",
"GET_GLOBAL_IDS",
"SET_API",
"CREATE_COMMAND_QUEUE",
"FREE_COMMAND_QUEUE",
"ALLOCATE_MEM",
"FREE_MEM",
"unkC",
"unkD",
"unkE",
"CREATE_SHMEM",
"FREE_SHMEM",
"CREATE_NOTIFICATION_QUEUE",
"FREE_NOTIFICATION_QUEUE",
"unk13",
"unk14",
"unk15",
"unk16",
"unk17",
"unk18",
"unk19",
"unk1A",
"unk1B",
"unk1C",
"unk1D",
"SUBMIT_COMMAND_BUFFERS",
"unk1F",
"unk20",
"unk21",
"unk22",
"GET_VERSION",
"unk24",
"unk25",
"unk26",
"unk27",
"unk28",
"unk29",
"unk2A",
"unk2B",
"unk2C",
"unk2D",
"unk2E",
"unk2F",
"unk30",
"unk31"
};
"unk0",
"unk1",
"unk2",
"unk3",
"unk4",
"unk5",
"GET_GLOBAL_IDS",
"SET_API",
"CREATE_COMMAND_QUEUE",
"FREE_COMMAND_QUEUE",
"ALLOCATE_MEM",
"FREE_MEM",
"unkC",
"unkD",
"unkE",
"CREATE_SHMEM",
"FREE_SHMEM",
"CREATE_NOTIFICATION_QUEUE",
"FREE_NOTIFICATION_QUEUE",
"unk13",
"unk14",
"unk15",
"unk16",
"unk17",
"unk18",
"unk19",
"unk1A",
"unk1B",
"unk1C",
"unk1D",
"SUBMIT_COMMAND_BUFFERS",
"unk1F",
"unk20",
"unk21",
"unk22",
"GET_VERSION",
"unk24",
"unk25",
"unk26",
"unk27",
"unk28",
"unk29",
"unk2A",
"unk2B",
"unk2C",
"unk2D",
"unk2E",
"unk2F",
"unk30",
"unk31"};
static inline const char *
wrap_selector_name(uint32_t selector)
{
return (selector < AGX_NUM_SELECTORS) ? selector_table[selector] : "unk??";
return (selector < AGX_NUM_SELECTORS) ? selector_table[selector] : "unk??";
}
struct agx_create_command_queue_resp {
uint64_t id;
uint32_t unk2; // 90 0A 08 27
uint32_t unk3; // 0
uint64_t id;
uint32_t unk2; // 90 0A 08 27
uint32_t unk3; // 0
} __attribute__((packed));
struct agx_create_shmem_resp {
@ -137,12 +136,12 @@ struct agx_create_shmem_resp {
struct agx_create_notification_queue_resp {
#ifdef __APPLE__
IODataQueueMemory *queue;
IODataQueueMemory *queue;
#else
void *queue;
#endif
uint32_t unk2; // 1
uint32_t unk3; // 0
uint32_t unk2; // 1
uint32_t unk3; // 0
} __attribute__((packed));
struct agx_submit_cmdbuf_req {
@ -165,26 +164,35 @@ struct agx_submit_cmdbuf_req {
* be be in the first 4GiB */
enum agx_memory_type {
AGX_MEMORY_TYPE_NORMAL = 0x00000000, /* used for user allocations */
AGX_MEMORY_TYPE_UNK = 0x08000000, /* unknown */
AGX_MEMORY_TYPE_CMDBUF_64 = 0x18000000, /* used for command buffer storage */
AGX_MEMORY_TYPE_SHADER = 0x48000000, /* used for shader memory, with VA = 0 */
AGX_MEMORY_TYPE_CMDBUF_32 = 0x58000000, /* used for command buffers, with VA < 32-bit */
AGX_MEMORY_TYPE_FRAMEBUFFER = 0x00888F00, /* used for framebuffer backing */
AGX_MEMORY_TYPE_NORMAL = 0x00000000, /* used for user allocations */
AGX_MEMORY_TYPE_UNK = 0x08000000, /* unknown */
AGX_MEMORY_TYPE_CMDBUF_64 = 0x18000000, /* used for command buffer storage */
AGX_MEMORY_TYPE_SHADER =
0x48000000, /* used for shader memory, with VA = 0 */
AGX_MEMORY_TYPE_CMDBUF_32 =
0x58000000, /* used for command buffers, with VA < 32-bit */
AGX_MEMORY_TYPE_FRAMEBUFFER = 0x00888F00, /* used for framebuffer backing */
};
static inline const char *
agx_memory_type_name(uint32_t type)
{
switch (type) {
case AGX_MEMORY_TYPE_NORMAL: return "normal";
case AGX_MEMORY_TYPE_UNK: return "unk";
case AGX_MEMORY_TYPE_CMDBUF_64: return "cmdbuf_64";
case AGX_MEMORY_TYPE_SHADER: return "shader";
case AGX_MEMORY_TYPE_CMDBUF_32: return "cmdbuf_32";
case AGX_MEMORY_TYPE_FRAMEBUFFER: return "framebuffer";
default: return NULL;
}
switch (type) {
case AGX_MEMORY_TYPE_NORMAL:
return "normal";
case AGX_MEMORY_TYPE_UNK:
return "unk";
case AGX_MEMORY_TYPE_CMDBUF_64:
return "cmdbuf_64";
case AGX_MEMORY_TYPE_SHADER:
return "shader";
case AGX_MEMORY_TYPE_CMDBUF_32:
return "cmdbuf_32";
case AGX_MEMORY_TYPE_FRAMEBUFFER:
return "framebuffer";
default:
return NULL;
}
}
struct agx_allocate_resource_req {
@ -199,7 +207,8 @@ struct agx_allocate_resource_req {
/* Handle of the parent resource when a suballocation is requested.
* Based on an assertion failure, this corresponds to:
*
* -[IOGPUMetalBuffer initWithPrimaryBuffer:heapIndex:bufferIndex:bufferOffset:length:args:argsSize:]
* -[IOGPUMetalBuffer
* initWithPrimaryBuffer:heapIndex:bufferIndex:bufferOffset:length:args:argsSize:]
*/
uint32_t parent;
@ -260,7 +269,7 @@ struct agx_map_header {
uint64_t cmdbuf_id; // GUID
uint32_t segment_count;
uint16_t length;
uint16_t unk; // 0x8000
uint16_t unk; // 0x8000
uint64_t encoder_id; // GUID
/* IOAccelSegmentResourceListHeader */
@ -280,7 +289,6 @@ struct agx_map_entry {
uint16_t resource_count;
} __attribute__((packed));
uint64_t
agx_get_global_id(struct agx_device *dev);
uint64_t agx_get_global_id(struct agx_device *dev);
#endif

View file

@ -23,9 +23,9 @@
*
*/
#include "pool.h"
#include "agx_bo.h"
#include "agx_device.h"
#include "pool.h"
/* Transient command stream pooling: command stream uploads try to simply copy
* into whereever we left off. If there isn't space, we allocate a new entry
@ -36,8 +36,8 @@
static struct agx_bo *
agx_pool_alloc_backing(struct agx_pool *pool, size_t bo_sz)
{
struct agx_bo *bo = agx_bo_create(pool->dev, bo_sz, pool->create_flags,
"Pool");
struct agx_bo *bo =
agx_bo_create(pool->dev, bo_sz, pool->create_flags, "Pool");
util_dynarray_append(&pool->bos, struct agx_bo *, bo);
pool->transient_bo = bo;
@ -48,7 +48,7 @@ agx_pool_alloc_backing(struct agx_pool *pool, size_t bo_sz)
void
agx_pool_init(struct agx_pool *pool, struct agx_device *dev,
unsigned create_flags, bool prealloc)
unsigned create_flags, bool prealloc)
{
memset(pool, 0, sizeof(*pool));
pool->dev = dev;
@ -63,7 +63,7 @@ void
agx_pool_cleanup(struct agx_pool *pool)
{
util_dynarray_foreach(&pool->bos, struct agx_bo *, bo) {
agx_bo_unreference(*bo);
agx_bo_unreference(*bo);
}
util_dynarray_fini(&pool->bos);
@ -91,7 +91,7 @@ agx_pool_alloc_aligned_with_bo(struct agx_pool *pool, size_t sz,
/* If we don't fit, allocate a new backing */
if (unlikely(bo == NULL || (offset + sz) >= POOL_SLAB_SIZE)) {
bo = agx_pool_alloc_backing(pool,
ALIGN_POT(MAX2(POOL_SLAB_SIZE, sz), 4096));
ALIGN_POT(MAX2(POOL_SLAB_SIZE, sz), 4096));
offset = 0;
}

View file

@ -53,12 +53,10 @@ struct agx_pool {
unsigned create_flags;
};
void
agx_pool_init(struct agx_pool *pool, struct agx_device *dev,
unsigned create_flags, bool prealloc);
void agx_pool_init(struct agx_pool *pool, struct agx_device *dev,
unsigned create_flags, bool prealloc);
void
agx_pool_cleanup(struct agx_pool *pool);
void agx_pool_cleanup(struct agx_pool *pool);
static inline unsigned
agx_pool_num_bos(struct agx_pool *pool)
@ -66,15 +64,14 @@ agx_pool_num_bos(struct agx_pool *pool)
return util_dynarray_num_elements(&pool->bos, struct agx_bo *);
}
void
agx_pool_get_bo_handles(struct agx_pool *pool, uint32_t *handles);
void agx_pool_get_bo_handles(struct agx_pool *pool, uint32_t *handles);
/* Represents a fat pointer for GPU-mapped memory, returned from the transient
* allocator and not used for much else */
struct agx_ptr
agx_pool_alloc_aligned_with_bo(struct agx_pool *pool, size_t sz,
unsigned alignment, struct agx_bo **bo);
struct agx_ptr agx_pool_alloc_aligned_with_bo(struct agx_pool *pool, size_t sz,
unsigned alignment,
struct agx_bo **bo);
static inline struct agx_ptr
agx_pool_alloc_aligned(struct agx_pool *pool, size_t sz, unsigned alignment)
@ -82,16 +79,16 @@ agx_pool_alloc_aligned(struct agx_pool *pool, size_t sz, unsigned alignment)
return agx_pool_alloc_aligned_with_bo(pool, sz, alignment, NULL);
}
uint64_t
agx_pool_upload(struct agx_pool *pool, const void *data, size_t sz);
uint64_t agx_pool_upload(struct agx_pool *pool, const void *data, size_t sz);
uint64_t
agx_pool_upload_aligned_with_bo(struct agx_pool *pool, const void *data,
size_t sz, unsigned alignment,
struct agx_bo **bo);
uint64_t agx_pool_upload_aligned_with_bo(struct agx_pool *pool,
const void *data, size_t sz,
unsigned alignment,
struct agx_bo **bo);
static inline uint64_t
agx_pool_upload_aligned(struct agx_pool *pool, const void *data, size_t sz, unsigned alignment)
agx_pool_upload_aligned(struct agx_pool *pool, const void *data, size_t sz,
unsigned alignment)
{
return agx_pool_upload_aligned_with_bo(pool, data, sz, alignment, NULL);
}
@ -102,24 +99,23 @@ struct agx_desc_alloc_info {
unsigned nelems;
};
#define AGX_DESC_ARRAY(count, name) \
{ \
.size = MALI_ ## name ## _LENGTH, \
.align = MALI_ ## name ## _ALIGN, \
.nelems = count, \
}
#define AGX_DESC_ARRAY(count, name) \
{ \
.size = MALI_##name##_LENGTH, .align = MALI_##name##_ALIGN, \
.nelems = count, \
}
#define AGX_DESC(name) AGX_DESC_ARRAY(1, name)
#define AGX_DESC_AGGREGATE(...) \
(struct agx_desc_alloc_info[]) { \
__VA_ARGS__, \
{ 0 }, \
}
#define AGX_DESC_AGGREGATE(...) \
(struct agx_desc_alloc_info[]) \
{ \
__VA_ARGS__, {0}, \
}
static inline struct agx_ptr
agx_pool_alloc_descs(struct agx_pool *pool,
const struct agx_desc_alloc_info *descs)
const struct agx_desc_alloc_info *descs)
{
unsigned size = 0;
unsigned align = descs[0].align;
@ -132,13 +128,13 @@ agx_pool_alloc_descs(struct agx_pool *pool,
return agx_pool_alloc_aligned(pool, size, align);
}
#define agx_pool_alloc_desc(pool, name) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(AGX_DESC(name)))
#define agx_pool_alloc_desc(pool, name) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(AGX_DESC(name)))
#define agx_pool_alloc_desc_array(pool, count, name) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(AGX_DESC_ARRAY(count, name)))
#define agx_pool_alloc_desc_array(pool, count, name) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(AGX_DESC_ARRAY(count, name)))
#define agx_pool_alloc_desc_aggregate(pool, ...) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(__VA_ARGS__))
#define agx_pool_alloc_desc_aggregate(pool, ...) \
agx_pool_alloc_descs(pool, AGX_DESC_AGGREGATE(__VA_ARGS__))
#endif

View file

@ -31,18 +31,18 @@ const struct {
bool inexact;
} lod_cases[] = {
/* Lower bound clamp */
{ -INFINITY, 0x00, true },
{ -0.1, 0x00, true },
{ -0.0, 0x00, true },
{-INFINITY, 0x00, true},
{-0.1, 0x00, true},
{-0.0, 0x00, true},
/* Exact bounds */
{ 0.0, 0x00 },
{ 14.0, 0x380 },
{0.0, 0x00},
{14.0, 0x380},
/* Upper bound clamp */
{ 14.1, 0x380, true },
{ 18.1, 0x380, true },
{ INFINITY, 0x380, true },
{14.1, 0x380, true},
{18.1, 0x380, true},
{INFINITY, 0x380, true},
};
const struct {
@ -52,18 +52,9 @@ const struct {
uint32_t encoded;
} group_cases[] = {
/* Groups of 16 in a 4-bit word */
{ 16, 4, 0, 0x1 },
{ 16, 4, 1, 0x1 },
{ 16, 4, 16, 0x1 },
{ 16, 4, 17, 0x2 },
{ 16, 4, 31, 0x2 },
{ 16, 4, 32, 0x2 },
{ 16, 4, 33, 0x3 },
{ 16, 4, 239, 0xF },
{ 16, 4, 240, 0xF },
{ 16, 4, 241, 0x0 },
{ 16, 4, 255, 0x0 },
{ 16, 4, 256, 0x0 },
{16, 4, 0, 0x1}, {16, 4, 1, 0x1}, {16, 4, 16, 0x1}, {16, 4, 17, 0x2},
{16, 4, 31, 0x2}, {16, 4, 32, 0x2}, {16, 4, 33, 0x3}, {16, 4, 239, 0xF},
{16, 4, 240, 0xF}, {16, 4, 241, 0x0}, {16, 4, 255, 0x0}, {16, 4, 256, 0x0},
};
TEST(LODClamp, Encode)
@ -78,7 +69,7 @@ TEST(LODClamp, Decode)
if (lod_cases[i].inexact)
continue;
uint8_t cl[4] = { 0 };
uint8_t cl[4] = {0};
memcpy(cl, &lod_cases[i].encoded, sizeof(lod_cases[i].encoded));
ASSERT_EQ(__gen_unpack_lod(cl, 0, 10), lod_cases[i].f);
@ -88,8 +79,7 @@ TEST(LODClamp, Decode)
TEST(Groups, Encode)
{
for (unsigned i = 0; i < ARRAY_SIZE(group_cases); ++i) {
ASSERT_EQ(__gen_to_groups(group_cases[i].value,
group_cases[i].group_size,
ASSERT_EQ(__gen_to_groups(group_cases[i].value, group_cases[i].group_size,
group_cases[i].length),
group_cases[i].encoded);
}
@ -98,16 +88,16 @@ TEST(Groups, Encode)
TEST(Groups, Decode)
{
for (unsigned i = 0; i < ARRAY_SIZE(group_cases); ++i) {
unsigned expected = ALIGN_POT(group_cases[i].value,
group_cases[i].group_size);
unsigned expected =
ALIGN_POT(group_cases[i].value, group_cases[i].group_size);
/* Clamp to minimum encodable */
if (group_cases[i].value == 0)
expected = group_cases[i].group_size;
ASSERT_EQ(__gen_from_groups(group_cases[i].encoded,
group_cases[i].group_size,
group_cases[i].length),
expected);
ASSERT_EQ(
__gen_from_groups(group_cases[i].encoded, group_cases[i].group_size,
group_cases[i].length),
expected);
}
}

View file

@ -137,24 +137,22 @@ TEST(Tilebuffer, Layouts)
for (unsigned i = 0; i < ARRAY_SIZE(tests); ++i) {
unsigned nr_cbufs;
for (nr_cbufs = 0;
nr_cbufs < ARRAY_SIZE(tests[i].formats) &&
tests[i].formats[nr_cbufs] != PIPE_FORMAT_NONE;
++nr_cbufs);
for (nr_cbufs = 0; nr_cbufs < ARRAY_SIZE(tests[i].formats) &&
tests[i].formats[nr_cbufs] != PIPE_FORMAT_NONE;
++nr_cbufs)
;
struct agx_tilebuffer_layout actual =
agx_build_tilebuffer_layout(tests[i].formats, nr_cbufs,
tests[i].nr_samples);
struct agx_tilebuffer_layout actual = agx_build_tilebuffer_layout(
tests[i].formats, nr_cbufs, tests[i].nr_samples);
ASSERT_EQ(tests[i].layout.sample_size_B, actual.sample_size_B) <<
tests[i].name;
ASSERT_EQ(tests[i].layout.sample_size_B, actual.sample_size_B)
<< tests[i].name;
ASSERT_EQ(tests[i].layout.nr_samples, actual.nr_samples) << tests[i].name;
ASSERT_EQ(tests[i].layout.tile_size.width, actual.tile_size.width) <<
tests[i].name;
ASSERT_EQ(tests[i].layout.tile_size.height, actual.tile_size.height) <<
tests[i].name;
ASSERT_EQ(tests[i].total_size,
agx_tilebuffer_total_size(&tests[i].layout)) <<
tests[i].name;
ASSERT_EQ(tests[i].layout.tile_size.width, actual.tile_size.width)
<< tests[i].name;
ASSERT_EQ(tests[i].layout.tile_size.height, actual.tile_size.height)
<< tests[i].name;
ASSERT_EQ(tests[i].total_size, agx_tilebuffer_total_size(&tests[i].layout))
<< tests[i].name;
}
}

View file

@ -20,22 +20,22 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <dlfcn.h>
#include <assert.h>
#include <dlfcn.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <mach/mach.h>
#include <IOKit/IOKitLib.h>
#include <mach/mach.h>
#include "util/compiler.h"
#include "io.h"
#include "decode.h"
#include "util.h"
#include "hexdump.h"
#include "dyld_interpose.h"
#include "hexdump.h"
#include "io.h"
#include "util.h"
/*
* Wrap IOKit entrypoints to intercept communication between the AGX kernel
@ -46,7 +46,7 @@
mach_port_t metal_connection = 0;
kern_return_t
wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t *input,
uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt,
uint64_t *output, uint32_t *outputCnt, void *outputStruct,
size_t *outputStructCntP)
@ -76,21 +76,21 @@ wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
case AGX_SELECTOR_SET_API:
assert(input == NULL && output == NULL && outputStruct == NULL);
assert(inputStruct != NULL && inputStructCnt == 16);
assert(((uint8_t *) inputStruct)[15] == 0x0);
assert(((uint8_t *)inputStruct)[15] == 0x0);
printf("%X: SET_API(%s)\n", connection, (const char *) inputStruct);
printf("%X: SET_API(%s)\n", connection, (const char *)inputStruct);
break;
case AGX_SELECTOR_ALLOCATE_MEM: {
const struct agx_allocate_resource_req *req = inputStruct;
struct agx_allocate_resource_req *req2 = (void *) inputStruct;
struct agx_allocate_resource_req *req2 = (void *)inputStruct;
req2->mode = (req->mode & 0x800) | 0x430;
bool suballocated = req->mode & 0x800;
printf("Resource allocation:\n");
printf(" Mode: 0x%X%s\n", req->mode & ~0x800,
suballocated ? " (suballocated) " : "");
suballocated ? " (suballocated) " : "");
printf(" CPU fixed: 0x%" PRIx64 "\n", req->cpu_fixed);
printf(" CPU fixed (parent): 0x%" PRIx64 "\n", req->cpu_fixed_parent);
printf(" Size: 0x%X\n", req->size);
@ -137,7 +137,7 @@ wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
const struct agx_submit_cmdbuf_req *req = inputStruct;
agxdecode_cmdstream(req->command_buffer_shmem_id,
req->segment_list_shmem_id, true);
req->segment_list_shmem_id, true);
if (getenv("ASAHI_DUMP"))
agxdecode_dump_mappings(req->segment_list_shmem_id);
@ -153,7 +153,7 @@ wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
for (uint64_t u = 0; u < inputCnt; ++u)
printf(" %llx", input[u]);
if(inputStructCnt) {
if (inputStructCnt) {
printf(", struct:\n");
hexdump(stdout, inputStruct, inputStructCnt, true);
} else {
@ -164,116 +164,112 @@ wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t* input,
}
/* Invoke the real method */
kern_return_t ret =
IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct,
inputStructCnt, output, outputCnt, outputStruct,
outputStructCntP);
kern_return_t ret = IOConnectCallMethod(
connection, selector, input, inputCnt, inputStruct, inputStructCnt,
output, outputCnt, outputStruct, outputStructCntP);
if (ret != 0)
printf("return %u\n", ret);
/* Track allocations for later analysis (dumping, disassembly, etc) */
switch (selector) {
case AGX_SELECTOR_CREATE_SHMEM: {
assert(inputCnt == 2);
assert((*outputStructCntP) == 0x10);
uint64_t *inp = (uint64_t *) input;
case AGX_SELECTOR_CREATE_SHMEM: {
assert(inputCnt == 2);
assert((*outputStructCntP) == 0x10);
uint64_t *inp = (uint64_t *)input;
uint8_t type = inp[1];
uint8_t type = inp[1];
assert(type <= 2);
if (type == 2)
printf("(cmdbuf with error reporting)\n");
assert(type <= 2);
if (type == 2)
printf("(cmdbuf with error reporting)\n");
uint64_t *ptr = (uint64_t *) outputStruct;
uint32_t *words = (uint32_t *) (ptr + 1);
uint64_t *ptr = (uint64_t *)outputStruct;
uint32_t *words = (uint32_t *)(ptr + 1);
agxdecode_track_alloc(&(struct agx_bo) {
.handle = words[1],
.ptr.cpu = (void *) *ptr,
.size = words[0],
.type = inp[1] ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP
});
agxdecode_track_alloc(&(struct agx_bo){
.handle = words[1],
.ptr.cpu = (void *)*ptr,
.size = words[0],
.type = inp[1] ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP});
break;
break;
}
case AGX_SELECTOR_ALLOCATE_MEM: {
assert((*outputStructCntP) == 0x50);
const struct agx_allocate_resource_req *req = inputStruct;
struct agx_allocate_resource_resp *resp = outputStruct;
if (resp->cpu && req->cpu_fixed)
assert(resp->cpu == req->cpu_fixed);
printf("Response:\n");
printf(" GPU VA: 0x%" PRIx64 "\n", resp->gpu_va);
printf(" CPU VA: 0x%" PRIx64 "\n", resp->cpu);
printf(" Handle: %u\n", resp->handle);
printf(" Root size: 0x%" PRIx64 "\n", resp->root_size);
printf(" Suballocation size: 0x%" PRIx64 "\n", resp->sub_size);
printf(" GUID: 0x%X\n", resp->guid);
for (unsigned i = 0; i < ARRAY_SIZE(resp->unk4); ++i) {
if (resp->unk4[i])
printf(" UNK%u: 0x%X\n", 4 + i, resp->unk4[i]);
}
for (unsigned i = 0; i < ARRAY_SIZE(resp->unk11); ++i) {
if (resp->unk11[i])
printf(" UNK%u: 0x%X\n", 11 + i, resp->unk11[i]);
}
case AGX_SELECTOR_ALLOCATE_MEM: {
assert((*outputStructCntP) == 0x50);
const struct agx_allocate_resource_req *req = inputStruct;
struct agx_allocate_resource_resp *resp = outputStruct;
if (resp->cpu && req->cpu_fixed)
assert(resp->cpu == req->cpu_fixed);
printf("Response:\n");
printf(" GPU VA: 0x%" PRIx64 "\n", resp->gpu_va);
printf(" CPU VA: 0x%" PRIx64 "\n", resp->cpu);
printf(" Handle: %u\n", resp->handle);
printf(" Root size: 0x%" PRIx64 "\n", resp->root_size);
printf(" Suballocation size: 0x%" PRIx64 "\n", resp->sub_size);
printf(" GUID: 0x%X\n", resp->guid);
for (unsigned i = 0; i < ARRAY_SIZE(resp->unk4); ++i) {
if (resp->unk4[i])
printf(" UNK%u: 0x%X\n", 4 + i, resp->unk4[i]);
}
for (unsigned i = 0; i < ARRAY_SIZE(resp->unk11); ++i) {
if (resp->unk11[i])
printf(" UNK%u: 0x%X\n", 11 + i, resp->unk11[i]);
}
if (req->parent)
assert(resp->sub_size <= resp->root_size);
else
assert(resp->sub_size == resp->root_size);
if (req->parent)
assert(resp->sub_size <= resp->root_size);
else
assert(resp->sub_size == resp->root_size);
agxdecode_track_alloc(&(struct agx_bo){
.type = AGX_ALLOC_REGULAR,
.size = resp->sub_size,
.handle = resp->handle,
.ptr.gpu = resp->gpu_va,
.ptr.cpu = (void *)resp->cpu,
});
agxdecode_track_alloc(&(struct agx_bo) {
.type = AGX_ALLOC_REGULAR,
.size = resp->sub_size,
.handle = resp->handle,
.ptr.gpu = resp->gpu_va,
.ptr.cpu = (void *) resp->cpu,
});
break;
}
break;
}
case AGX_SELECTOR_FREE_MEM: {
assert(inputCnt == 1);
assert(inputStruct == NULL);
assert(output == NULL);
assert(outputStruct == NULL);
case AGX_SELECTOR_FREE_MEM: {
assert(inputCnt == 1);
assert(inputStruct == NULL);
assert(output == NULL);
assert(outputStruct == NULL);
agxdecode_track_free(
&(struct agx_bo){.type = AGX_ALLOC_REGULAR, .handle = input[0]});
agxdecode_track_free(&(struct agx_bo) {
.type = AGX_ALLOC_REGULAR,
.handle = input[0]
});
break;
}
break;
}
default:
/* Dump the outputs */
if (outputCnt) {
printf("%u scalars: ", *outputCnt);
default:
/* Dump the outputs */
if(outputCnt) {
printf("%u scalars: ", *outputCnt);
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
printf("\n");
}
if(outputStructCntP) {
printf(" struct\n");
hexdump(stdout, outputStruct, *outputStructCntP, true);
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
hexdump(stdout, *o, 64, true);
}
}
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
printf("\n");
break;
}
if (outputStructCntP) {
printf(" struct\n");
hexdump(stdout, outputStruct, *outputStructCntP, true);
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
hexdump(stdout, *o, 64, true);
}
}
printf("\n");
break;
}
return ret;
@ -287,58 +283,59 @@ wrap_AsyncMethod(mach_port_t connection, uint32_t selector,
size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt,
void *outputStruct, size_t *outputStructCntP)
{
/* Check the arguments make sense */
assert((input != NULL) == (inputCnt != 0));
assert((inputStruct != NULL) == (inputStructCnt != 0));
assert((output != NULL) == (outputCnt != 0));
assert((outputStruct != NULL) == (outputStructCntP != 0));
/* Check the arguments make sense */
assert((input != NULL) == (inputCnt != 0));
assert((inputStruct != NULL) == (inputStructCnt != 0));
assert((output != NULL) == (outputCnt != 0));
assert((outputStruct != NULL) == (outputStructCntP != 0));
printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector,
printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector,
wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
for (uint64_t u = 0; u < inputCnt; ++u)
printf(" %llx", input[u]);
for (uint64_t u = 0; u < inputCnt; ++u)
printf(" %llx", input[u]);
if(inputStructCnt) {
printf(", struct:\n");
hexdump(stdout, inputStruct, inputStructCnt, true);
} else {
printf("\n");
}
if (inputStructCnt) {
printf(", struct:\n");
hexdump(stdout, inputStruct, inputStructCnt, true);
} else {
printf("\n");
}
printf(", references: ");
for (unsigned i = 0; i < referenceCnt; ++i)
printf(" %llx", reference[i]);
printf("\n");
printf(", references: ");
for (unsigned i = 0; i < referenceCnt; ++i)
printf(" %llx", reference[i]);
printf("\n");
kern_return_t ret = IOConnectCallAsyncMethod(connection, selector, wakePort,
reference, referenceCnt, input, inputCnt, inputStruct, inputStructCnt,
output, outputCnt, outputStruct, outputStructCntP);
kern_return_t ret = IOConnectCallAsyncMethod(
connection, selector, wakePort, reference, referenceCnt, input, inputCnt,
inputStruct, inputStructCnt, output, outputCnt, outputStruct,
outputStructCntP);
printf("return %u", ret);
printf("return %u", ret);
if(outputCnt) {
printf("%u scalars: ", *outputCnt);
if (outputCnt) {
printf("%u scalars: ", *outputCnt);
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
printf("\n");
}
printf("\n");
}
if(outputStructCntP) {
printf(" struct\n");
hexdump(stdout, outputStruct, *outputStructCntP, true);
if (outputStructCntP) {
printf(" struct\n");
hexdump(stdout, outputStruct, *outputStructCntP, true);
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
hexdump(stdout, *o, 64, true);
}
}
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
hexdump(stdout, *o, 64, true);
}
}
printf("\n");
return ret;
printf("\n");
return ret;
}
kern_return_t
@ -358,9 +355,9 @@ wrap_AsyncStructMethod(mach_port_t connection, uint32_t selector,
size_t inputStructCnt, void *outputStruct,
size_t *outputStructCnt)
{
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, NULL, 0, inputStruct, inputStructCnt,
NULL, NULL, outputStruct, outputStructCnt);
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, NULL, 0, inputStruct, inputStructCnt,
NULL, NULL, outputStruct, outputStructCnt);
}
kern_return_t
@ -368,8 +365,8 @@ wrap_ScalarMethod(mach_port_t connection, uint32_t selector,
const uint64_t *input, uint32_t inputCnt, uint64_t *output,
uint32_t *outputCnt)
{
return wrap_Method(connection, selector, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
return wrap_Method(connection, selector, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
}
kern_return_t
@ -378,9 +375,9 @@ wrap_AsyncScalarMethod(mach_port_t connection, uint32_t selector,
uint32_t referenceCnt, const uint64_t *input,
uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)
{
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
}
mach_port_t
@ -392,22 +389,24 @@ wrap_DataQueueAllocateNotificationPort()
}
kern_return_t
wrap_SetNotificationPort(io_connect_t connect, uint32_t type,
mach_port_t port, uintptr_t reference)
wrap_SetNotificationPort(io_connect_t connect, uint32_t type, mach_port_t port,
uintptr_t reference)
{
printf("Set noficiation port connect=%X, type=%X, port=%X, reference=%"
PRIx64"\n", connect, type, port, (uint64_t) reference);
printf(
"Set noficiation port connect=%X, type=%X, port=%X, reference=%" PRIx64
"\n",
connect, type, port, (uint64_t)reference);
return IOConnectSetNotificationPort(connect, type, port, reference);
}
IOReturn
wrap_DataQueueWaitForAvailableData(IODataQueueMemory *dataQueue,
mach_port_t notificationPort)
mach_port_t notificationPort)
{
printf("Waiting for data queue at notif port %X\n", notificationPort);
printf("Waiting for data queue at notif port %X\n", notificationPort);
IOReturn ret = IODataQueueWaitForAvailableData(dataQueue, notificationPort);
printf("ret=%X\n", ret);
printf("ret=%X\n", ret);
return ret;
}
@ -419,9 +418,11 @@ wrap_DataQueuePeek(IODataQueueMemory *dataQueue)
}
IOReturn
wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data, uint32_t *dataSize)
wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data,
uint32_t *dataSize)
{
printf("Dequeueing (dataQueue=%p, data=%p, buffer %u)\n", dataQueue, data, *dataSize);
printf("Dequeueing (dataQueue=%p, data=%p, buffer %u)\n", dataQueue, data,
*dataSize);
IOReturn ret = IODataQueueDequeue(dataQueue, data, dataSize);
printf("Return \"%s\", got %u bytes\n", mach_error_string(ret), *dataSize);
@ -441,7 +442,9 @@ DYLD_INTERPOSE(wrap_AsyncStructMethod, IOConnectCallAsyncStructMethod);
DYLD_INTERPOSE(wrap_ScalarMethod, IOConnectCallScalarMethod);
DYLD_INTERPOSE(wrap_AsyncScalarMethod, IOConnectCallAsyncScalarMethod);
DYLD_INTERPOSE(wrap_SetNotificationPort, IOConnectSetNotificationPort);
DYLD_INTERPOSE(wrap_DataQueueAllocateNotificationPort, IODataQueueAllocateNotificationPort);
DYLD_INTERPOSE(wrap_DataQueueWaitForAvailableData, IODataQueueWaitForAvailableData);
DYLD_INTERPOSE(wrap_DataQueueAllocateNotificationPort,
IODataQueueAllocateNotificationPort);
DYLD_INTERPOSE(wrap_DataQueueWaitForAvailableData,
IODataQueueWaitForAvailableData);
DYLD_INTERPOSE(wrap_DataQueuePeek, IODataQueuePeek);
DYLD_INTERPOSE(wrap_DataQueueDequeue, IODataQueueDequeue);

View file

@ -6,8 +6,8 @@
#include "agx_state.h"
#define foreach_batch(ctx, idx) \
BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
#define foreach_batch(ctx, idx) \
BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES)
static unsigned
agx_batch_idx(struct agx_batch *batch)
@ -42,10 +42,12 @@ agx_batch_init(struct agx_context *ctx,
batch->bo_list.set = rzalloc_array(ctx, BITSET_WORD, 128);
batch->bo_list.word_count = 128;
} else {
memset(batch->bo_list.set, 0, batch->bo_list.word_count * sizeof(BITSET_WORD));
memset(batch->bo_list.set, 0,
batch->bo_list.word_count * sizeof(BITSET_WORD));
}
batch->encoder = agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER, "Encoder");
batch->encoder =
agx_bo_create(dev, 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER, "Encoder");
batch->encoder_current = batch->encoder->ptr.cpu;
batch->encoder_end = batch->encoder_current + batch->encoder->size;
@ -183,7 +185,8 @@ agx_flush_all(struct agx_context *ctx, const char *reason)
}
void
agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch, const char *reason)
agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
const char *reason)
{
if (reason)
perf_debug_ctx(ctx, "Flushing due to: %s\n", reason);
@ -192,10 +195,8 @@ agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch, con
}
static void
agx_flush_readers_except(struct agx_context *ctx,
struct agx_resource *rsrc,
struct agx_batch *except,
const char *reason)
agx_flush_readers_except(struct agx_context *ctx, struct agx_resource *rsrc,
struct agx_batch *except, const char *reason)
{
unsigned idx;
@ -213,10 +214,8 @@ agx_flush_readers_except(struct agx_context *ctx,
}
static void
agx_flush_writer_except(struct agx_context *ctx,
struct agx_resource *rsrc,
struct agx_batch *except,
const char *reason)
agx_flush_writer_except(struct agx_context *ctx, struct agx_resource *rsrc,
struct agx_batch *except, const char *reason)
{
struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
@ -241,13 +240,15 @@ agx_any_batch_uses_resource(struct agx_context *ctx, struct agx_resource *rsrc)
}
void
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
const char *reason)
{
agx_flush_readers_except(ctx, rsrc, NULL, reason);
}
void
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason)
agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
const char *reason)
{
agx_flush_writer_except(ctx, rsrc, NULL, reason);
}

View file

@ -23,11 +23,11 @@
* SOFTWARE.
*/
#include "agx_state.h"
#include "compiler/nir/nir_builder.h"
#include "asahi/compiler/agx_compile.h"
#include "compiler/nir/nir_builder.h"
#include "gallium/auxiliary/util/u_blitter.h"
#include "gallium/auxiliary/util/u_dump.h"
#include "agx_state.h"
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
@ -35,11 +35,13 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
{
util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
util_blitter_save_vertex_elements(blitter, ctx->attributes);
util_blitter_save_vertex_shader(blitter, ctx->stage[PIPE_SHADER_VERTEX].shader);
util_blitter_save_vertex_shader(blitter,
ctx->stage[PIPE_SHADER_VERTEX].shader);
util_blitter_save_rasterizer(blitter, ctx->rast);
util_blitter_save_viewport(blitter, &ctx->viewport);
util_blitter_save_scissor(blitter, &ctx->scissor);
util_blitter_save_fragment_shader(blitter, ctx->stage[PIPE_SHADER_FRAGMENT].shader);
util_blitter_save_fragment_shader(blitter,
ctx->stage[PIPE_SHADER_FRAGMENT].shader);
util_blitter_save_blend(blitter, ctx->blend);
util_blitter_save_depth_stencil_alpha(blitter, ctx->zs);
util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
@ -47,29 +49,28 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
util_blitter_save_sample_mask(blitter, ctx->sample_mask, 0);
util_blitter_save_framebuffer(blitter, &ctx->framebuffer);
util_blitter_save_fragment_sampler_states(blitter,
ctx->stage[PIPE_SHADER_FRAGMENT].sampler_count,
(void **)(ctx->stage[PIPE_SHADER_FRAGMENT].samplers));
util_blitter_save_fragment_sampler_views(blitter,
ctx->stage[PIPE_SHADER_FRAGMENT].texture_count,
(struct pipe_sampler_view **)ctx->stage[PIPE_SHADER_FRAGMENT].textures);
util_blitter_save_fragment_constant_buffer_slot(blitter,
ctx->stage[PIPE_SHADER_FRAGMENT].cb);
util_blitter_save_fragment_sampler_states(
blitter, ctx->stage[PIPE_SHADER_FRAGMENT].sampler_count,
(void **)(ctx->stage[PIPE_SHADER_FRAGMENT].samplers));
util_blitter_save_fragment_sampler_views(
blitter, ctx->stage[PIPE_SHADER_FRAGMENT].texture_count,
(struct pipe_sampler_view **)ctx->stage[PIPE_SHADER_FRAGMENT].textures);
util_blitter_save_fragment_constant_buffer_slot(
blitter, ctx->stage[PIPE_SHADER_FRAGMENT].cb);
if (!render_cond) {
util_blitter_save_render_condition(blitter,
(struct pipe_query *) ctx->cond_query,
ctx->cond_cond, ctx->cond_mode);
(struct pipe_query *)ctx->cond_query,
ctx->cond_cond, ctx->cond_mode);
}
}
void
agx_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info)
agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
//if (info->render_condition_enable &&
// !agx_render_condition_check(pan_context(pipe)))
// return;
// if (info->render_condition_enable &&
// !agx_render_condition_check(pan_context(pipe)))
// return;
struct agx_context *ctx = agx_context(pipe);

View file

@ -23,35 +23,35 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <errno.h>
#include <stdio.h>
#include "asahi/compiler/agx_compile.h"
#include "asahi/layout/layout.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_memory.h"
#include "util/u_screen.h"
#include "util/u_inlines.h"
#include "util/format/u_format.h"
#include "util/u_upload_mgr.h"
#include "util/half_float.h"
#include "frontend/winsys_handle.h"
#include "asahi/lib/agx_formats.h"
#include "asahi/lib/decode.h"
#include "frontend/sw_winsys.h"
#include "frontend/winsys_handle.h"
#include "gallium/auxiliary/renderonly/renderonly.h"
#include "gallium/auxiliary/util/u_debug_cb.h"
#include "gallium/auxiliary/util/u_framebuffer.h"
#include "gallium/auxiliary/util/u_surface.h"
#include "gallium/auxiliary/util/u_transfer.h"
#include "gallium/auxiliary/util/u_transfer_helper.h"
#include "gallium/auxiliary/util/u_surface.h"
#include "gallium/auxiliary/util/u_framebuffer.h"
#include "gallium/auxiliary/util/u_debug_cb.h"
#include "gallium/auxiliary/renderonly/renderonly.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "util/format/u_format.h"
#include "util/half_float.h"
#include "util/u_drm.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_screen.h"
#include "util/u_upload_mgr.h"
#include "agx_device.h"
#include "agx_public.h"
#include "agx_state.h"
#include "magic.h"
#include "asahi/compiler/agx_compile.h"
#include "asahi/lib/decode.h"
#include "asahi/lib/agx_formats.h"
#include "util/u_drm.h"
/* drm_fourcc cannot be built on macOS */
#ifndef __APPLE__
@ -117,12 +117,11 @@ ail_modifier_to_tiling(uint64_t modifier)
}
static void
agx_resource_setup(struct agx_device *dev,
struct agx_resource *nresource)
agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
{
struct pipe_resource *templ = &nresource->base;
nresource->layout = (struct ail_layout) {
nresource->layout = (struct ail_layout){
.tiling = ail_modifier_to_tiling(nresource->modifier),
.format = templ->format,
.width_px = templ->width0,
@ -136,8 +135,7 @@ agx_resource_setup(struct agx_device *dev,
static struct pipe_resource *
agx_resource_from_handle(struct pipe_screen *pscreen,
const struct pipe_resource *templat,
struct winsys_handle *whandle,
unsigned usage)
struct winsys_handle *whandle, unsigned usage)
{
struct agx_device *dev = agx_device(pscreen);
struct agx_resource *rsc;
@ -149,8 +147,9 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
if (!rsc)
return NULL;
rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID ?
DRM_FORMAT_MOD_LINEAR : whandle->modifier;
rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
? DRM_FORMAT_MOD_LINEAR
: whandle->modifier;
/* We need strides to be aligned. ail asserts this, but we want to fail
* gracefully so the app can handle the error.
@ -169,11 +168,11 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
rsc->bo = agx_bo_import(dev, whandle->handle);
/* Sometimes an import can fail e.g. on an invalid buffer fd, out of
* memory space to mmap it etc.
*/
* memory space to mmap it etc.
*/
if (!rsc->bo) {
FREE(rsc);
return NULL;
FREE(rsc);
return NULL;
}
agx_resource_setup(dev, rsc);
@ -189,9 +188,9 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
#ifndef __APPLE__
if (dev->ro) {
rsc->scanout =
renderonly_create_gpu_import_for_resource(prsc, dev->ro, NULL);
/* failure is expected in some cases.. */
rsc->scanout =
renderonly_create_gpu_import_for_resource(prsc, dev->ro, NULL);
/* failure is expected in some cases.. */
}
#endif
@ -199,10 +198,8 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
}
static bool
agx_resource_get_handle(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct pipe_resource *pt,
struct winsys_handle *handle,
agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
struct pipe_resource *pt, struct winsys_handle *handle,
unsigned usage)
{
struct agx_device *dev = agx_device(pscreen);
@ -247,13 +244,12 @@ agx_resource_get_handle(struct pipe_screen *pscreen,
return true;
}
static bool
agx_resource_get_param(struct pipe_screen *pscreen,
struct pipe_context *pctx, struct pipe_resource *prsc,
unsigned plane, unsigned layer, unsigned level,
enum pipe_resource_param param,
unsigned usage, uint64_t *value)
agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
struct pipe_resource *prsc, unsigned plane,
unsigned layer, unsigned level,
enum pipe_resource_param param, unsigned usage,
uint64_t *value)
{
struct agx_resource *rsrc = (struct agx_resource *)prsc;
struct pipe_resource *cur;
@ -319,7 +315,8 @@ static bool
agx_twiddled_allowed(const struct agx_resource *pres)
{
/* Certain binds force linear */
if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_LINEAR))
if (pres->base.bind &
(PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_LINEAR))
return false;
/* Buffers must be linear, and it does not make sense to twiddle 1D */
@ -338,11 +335,9 @@ agx_compression_allowed(const struct agx_resource *pres)
return false;
/* Limited to renderable */
if (pres->base.bind & ~(PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DEPTH_STENCIL |
PIPE_BIND_SHARED |
PIPE_BIND_SCANOUT))
if (pres->base.bind &
~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
return false;
/* We use the PBE for compression via staging blits, so we can only compress
@ -360,7 +355,8 @@ agx_compression_allowed(const struct agx_resource *pres)
* arrayed linear staging resources, which the hardware doesn't support. This
* could be worked around with more sophisticated blit code.
*/
if (pres->base.target != PIPE_TEXTURE_2D && pres->base.target != PIPE_TEXTURE_RECT)
if (pres->base.target != PIPE_TEXTURE_2D &&
pres->base.target != PIPE_TEXTURE_RECT)
return false;
/* Small textures cannot (should not?) be compressed */
@ -375,12 +371,13 @@ agx_select_modifier_from_list(const struct agx_resource *pres,
const uint64_t *modifiers, int count)
{
if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers, count))
drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
count))
return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
if (agx_twiddled_allowed(pres) &&
drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
return DRM_FORMAT_MOD_APPLE_TWIDDLED;
return DRM_FORMAT_MOD_APPLE_TWIDDLED;
if (agx_linear_allowed(pres) &&
drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
@ -420,7 +417,8 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen,
nresource->base.screen = screen;
if (modifiers) {
nresource->modifier = agx_select_modifier_from_list(nresource, modifiers, count);
nresource->modifier =
agx_select_modifier_from_list(nresource, modifiers, count);
/* There may not be a matching modifier, bail if so */
if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
@ -443,7 +441,7 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen,
pipe_reference_init(&nresource->base.reference, 1);
struct sw_winsys *winsys = ((struct agx_screen *) screen)->winsys;
struct sw_winsys *winsys = ((struct agx_screen *)screen)->winsys;
ail_make_miptree(&nresource->layout);
@ -453,35 +451,35 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen,
assert(util_format_get_blockheight(templ->format) == 1);
unsigned width = templ->width0;
unsigned stride = templ->width0 * util_format_get_blocksize(templ->format);
unsigned stride =
templ->width0 * util_format_get_blocksize(templ->format);
unsigned size = nresource->layout.size_B;
unsigned effective_rows = DIV_ROUND_UP(size, stride);
struct pipe_resource scanout_tmpl = {
.target = nresource->base.target,
.format = templ->format,
.width0 = width,
.height0 = effective_rows,
.depth0 = 1,
.array_size = 1,
.target = nresource->base.target,
.format = templ->format,
.width0 = width,
.height0 = effective_rows,
.depth0 = 1,
.array_size = 1,
};
nresource->scanout = renderonly_scanout_for_resource(&scanout_tmpl,
dev->ro,
&handle);
nresource->scanout =
renderonly_scanout_for_resource(&scanout_tmpl, dev->ro, &handle);
if (!nresource->scanout) {
fprintf(stderr, "Failed to create scanout resource\n");
free(nresource);
return NULL;
fprintf(stderr, "Failed to create scanout resource\n");
free(nresource);
return NULL;
}
assert(handle.type == WINSYS_HANDLE_TYPE_FD);
nresource->bo = agx_bo_import(dev, handle.handle);
close(handle.handle);
if (!nresource->bo) {
free(nresource);
return NULL;
free(nresource);
return NULL;
}
return &nresource->base;
@ -496,14 +494,9 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen,
height = ALIGN_POT(height, 64);
}
nresource->dt = winsys->displaytarget_create(winsys,
templ->bind,
templ->format,
width,
height,
64,
NULL /*map_front_private*/,
&nresource->dt_stride);
nresource->dt = winsys->displaytarget_create(
winsys, templ->bind, templ->format, width, height, 64,
NULL /*map_front_private*/, &nresource->dt_stride);
if (nresource->layout.tiling == AIL_TILING_LINEAR)
nresource->layout.linear_stride_B = nresource->dt_stride;
@ -517,20 +510,20 @@ agx_resource_create_with_modifiers(struct pipe_screen *screen,
/* Guess a label based on the bind */
unsigned bind = templ->bind;
const char *label =
(bind & PIPE_BIND_INDEX_BUFFER) ? "Index buffer" :
(bind & PIPE_BIND_SCANOUT) ? "Scanout" :
(bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target" :
(bind & PIPE_BIND_SHARED) ? "Shared resource" :
(bind & PIPE_BIND_RENDER_TARGET) ? "Render target" :
(bind & PIPE_BIND_DEPTH_STENCIL) ? "Depth/stencil buffer" :
(bind & PIPE_BIND_SAMPLER_VIEW) ? "Texture" :
(bind & PIPE_BIND_VERTEX_BUFFER) ? "Vertex buffer" :
(bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer" :
(bind & PIPE_BIND_GLOBAL) ? "Global memory" :
(bind & PIPE_BIND_SHADER_BUFFER) ? "Shader buffer" :
(bind & PIPE_BIND_SHADER_IMAGE) ? "Shader image" :
"Other resource";
const char *label = (bind & PIPE_BIND_INDEX_BUFFER) ? "Index buffer"
: (bind & PIPE_BIND_SCANOUT) ? "Scanout"
: (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
: (bind & PIPE_BIND_SHARED) ? "Shared resource"
: (bind & PIPE_BIND_RENDER_TARGET) ? "Render target"
: (bind & PIPE_BIND_DEPTH_STENCIL)
? "Depth/stencil buffer"
: (bind & PIPE_BIND_SAMPLER_VIEW) ? "Texture"
: (bind & PIPE_BIND_VERTEX_BUFFER) ? "Vertex buffer"
: (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
: (bind & PIPE_BIND_GLOBAL) ? "Global memory"
: (bind & PIPE_BIND_SHADER_BUFFER) ? "Shader buffer"
: (bind & PIPE_BIND_SHADER_IMAGE) ? "Shader image"
: "Other resource";
nresource->bo = agx_bo_create(dev, nresource->layout.size_B,
AGX_MEMORY_TYPE_FRAMEBUFFER, label);
@ -551,11 +544,10 @@ agx_resource_create(struct pipe_screen *screen,
}
static void
agx_resource_destroy(struct pipe_screen *screen,
struct pipe_resource *prsrc)
agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
{
struct agx_resource *rsrc = (struct agx_resource *)prsrc;
struct agx_screen *agx_screen = (struct agx_screen*)screen;
struct agx_screen *agx_screen = (struct agx_screen *)screen;
if (rsrc->dt) {
/* display target */
@ -572,7 +564,6 @@ agx_resource_destroy(struct pipe_screen *screen,
FREE(rsrc);
}
/*
* transfer
*/
@ -610,10 +601,9 @@ agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc)
* complete. This may require flushing batches.
*/
static void
agx_prepare_for_map(struct agx_context *ctx,
struct agx_resource *rsrc,
agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
unsigned level,
unsigned usage, /* a combination of PIPE_MAP_x */
unsigned usage, /* a combination of PIPE_MAP_x */
const struct pipe_box *box)
{
/* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
@ -622,9 +612,8 @@ agx_prepare_for_map(struct agx_context *ctx,
if ((usage & PIPE_MAP_DISCARD_RANGE) &&
!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
rsrc->base.last_level == 0 &&
util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y,
box->z, box->width, box->height,
box->depth)) {
util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
box->width, box->height, box->depth)) {
usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
}
@ -658,7 +647,6 @@ agx_prepare_for_map(struct agx_context *ctx,
agx_flush_readers(ctx, rsrc, "Unsynchronized write");
}
/* Most of the time we can do CPU-side transfers, but sometimes we need to use
* the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
* Code adapted from panfrost */
@ -670,7 +658,7 @@ agx_alloc_staging(struct agx_context *ctx, struct agx_resource *rsc,
struct pipe_context *pctx = &ctx->base;
struct pipe_resource tmpl = rsc->base;
tmpl.width0 = box->width;
tmpl.width0 = box->width;
tmpl.height0 = box->height;
/* for array textures, box->depth is the array_size, otherwise for 3d
@ -691,7 +679,7 @@ agx_alloc_staging(struct agx_context *ctx, struct agx_resource *rsc,
struct pipe_resource *pstaging =
pctx->screen->resource_create(pctx->screen, &tmpl);
if (!pstaging)
return NULL;
return NULL;
return agx_resource(pstaging);
}
@ -703,13 +691,13 @@ agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
struct pipe_blit_info blit = {0};
blit.dst.resource = dst;
blit.dst.format = dst->format;
blit.dst.level = trans->base.level;
blit.dst.box = trans->base.box;
blit.dst.format = dst->format;
blit.dst.level = trans->base.level;
blit.dst.box = trans->base.box;
blit.src.resource = trans->staging.rsrc;
blit.src.format = trans->staging.rsrc->format;
blit.src.level = 0;
blit.src.box = trans->staging.box;
blit.src.format = trans->staging.rsrc->format;
blit.src.level = 0;
blit.src.box = trans->staging.box;
blit.mask = util_format_get_mask(blit.src.format);
blit.filter = PIPE_TEX_FILTER_NEAREST;
@ -723,13 +711,13 @@ agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
struct pipe_blit_info blit = {0};
blit.src.resource = src;
blit.src.format = src->format;
blit.src.level = trans->base.level;
blit.src.box = trans->base.box;
blit.src.format = src->format;
blit.src.level = trans->base.level;
blit.src.box = trans->base.box;
blit.dst.resource = trans->staging.rsrc;
blit.dst.format = trans->staging.rsrc->format;
blit.dst.level = 0;
blit.dst.box = trans->staging.box;
blit.dst.format = trans->staging.rsrc->format;
blit.dst.level = 0;
blit.dst.box = trans->staging.box;
blit.mask = util_format_get_mask(blit.dst.format);
blit.filter = PIPE_TEX_FILTER_NEAREST;
@ -737,10 +725,9 @@ agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
}
static void *
agx_transfer_map(struct pipe_context *pctx,
struct pipe_resource *resource,
agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
unsigned level,
unsigned usage, /* a combination of PIPE_MAP_x */
unsigned usage, /* a combination of PIPE_MAP_x */
const struct pipe_box *box,
struct pipe_transfer **out_transfer)
{
@ -784,8 +771,8 @@ agx_transfer_map(struct pipe_context *pctx,
assert(transfer->staging.rsrc != NULL);
if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
agx_blit_to_staging(pctx, transfer);
agx_flush_writer(ctx, staging, "GPU read staging blit");
agx_blit_to_staging(pctx, transfer);
agx_flush_writer(ctx, staging, "GPU read staging blit");
}
return staging->bo->ptr.cpu;
@ -795,17 +782,16 @@ agx_transfer_map(struct pipe_context *pctx,
transfer->base.stride =
util_format_get_stride(rsrc->layout.format, box->width);
transfer->base.layer_stride =
util_format_get_2d_size(rsrc->layout.format, transfer->base.stride,
box->height);
transfer->base.layer_stride = util_format_get_2d_size(
rsrc->layout.format, transfer->base.stride, box->height);
transfer->map = calloc(transfer->base.layer_stride, box->depth);
if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
for (unsigned z = 0; z < box->depth; ++z) {
uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
uint8_t *dst = (uint8_t *) transfer->map +
transfer->base.layer_stride * z;
uint8_t *dst =
(uint8_t *)transfer->map + transfer->base.layer_stride * z;
ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
box->x, box->y, box->width, box->height);
@ -814,51 +800,49 @@ agx_transfer_map(struct pipe_context *pctx,
return transfer->map;
} else {
assert (rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
transfer->base.layer_stride = rsrc->layout.layer_stride_B;
/* Be conservative for direct writes */
if ((usage & PIPE_MAP_WRITE) &&
(usage & (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT)))
{
(usage &
(PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
BITSET_SET(rsrc->data_valid, level);
}
uint32_t offset = ail_get_linear_pixel_B(&rsrc->layout, level, box->x,
box->y, box->z);
uint32_t offset =
ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
return ((uint8_t *) rsrc->bo->ptr.cpu) + offset;
return ((uint8_t *)rsrc->bo->ptr.cpu) + offset;
}
}
static void
agx_transfer_unmap(struct pipe_context *pctx,
struct pipe_transfer *transfer)
agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
{
/* Gallium expects writeback here, so we tile */
struct agx_transfer *trans = agx_transfer(transfer);
struct pipe_resource *prsrc = transfer->resource;
struct agx_resource *rsrc = (struct agx_resource *) prsrc;
struct agx_resource *rsrc = (struct agx_resource *)prsrc;
if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
agx_blit_from_staging(pctx, trans);
agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
"GPU write staging blit");
agx_blit_from_staging(pctx, trans);
agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
"GPU write staging blit");
} else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
assert(rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED);
for (unsigned z = 0; z < transfer->box.depth; ++z) {
uint8_t *map = agx_map_texture_cpu(rsrc, transfer->level,
transfer->box.z + z);
uint8_t *src = (uint8_t *) trans->map +
transfer->layer_stride * z;
uint8_t *map =
agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
ail_tile(map, src, &rsrc->layout, transfer->level,
transfer->stride, transfer->box.x, transfer->box.y,
transfer->box.width, transfer->box.height);
ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
transfer->box.x, transfer->box.y, transfer->box.width,
transfer->box.height);
}
}
@ -879,7 +863,8 @@ agx_transfer_unmap(struct pipe_context *pctx,
* clear/copy
*/
static void
agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state,
agx_clear(struct pipe_context *pctx, unsigned buffers,
const struct pipe_scissor_state *scissor_state,
const union pipe_color_union *color, double depth, unsigned stencil)
{
struct agx_context *ctx = agx_context(pctx);
@ -910,11 +895,11 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
/* Slow clears draw a fullscreen rectangle */
if (slowclear) {
agx_blitter_save(ctx, ctx->blitter, false /* render cond */);
util_blitter_clear(ctx->blitter, ctx->framebuffer.width,
ctx->framebuffer.height,
util_framebuffer_get_num_layers(&ctx->framebuffer),
slowclear, color, depth, stencil,
util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
util_blitter_clear(
ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
depth, stencil,
util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
}
batch->clear |= fastclear;
@ -923,8 +908,7 @@ agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor
}
static void
agx_flush_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
agx_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
{
agx_flush_writer(agx_context(ctx), agx_resource(resource), "flush_resource");
}
@ -933,8 +917,7 @@ agx_flush_resource(struct pipe_context *ctx,
* context
*/
static void
agx_flush(struct pipe_context *pctx,
struct pipe_fence_handle **fence,
agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
{
struct agx_context *ctx = agx_context(pctx);
@ -959,7 +942,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
}
/* Finalize the encoder */
uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
memcpy(batch->encoder_current, stop, sizeof(stop));
uint64_t pipeline_background = agx_build_meta(batch, false, false);
@ -980,8 +963,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
}
}
struct agx_resource *zbuf = batch->key.zsbuf ?
agx_resource(batch->key.zsbuf->texture) : NULL;
struct agx_resource *zbuf =
batch->key.zsbuf ? agx_resource(batch->key.zsbuf->texture) : NULL;
if (zbuf) {
unsigned level = batch->key.zsbuf->u.tex.level;
@ -997,8 +980,8 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
*/
uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
batch->scissor.size, 64);
uint64_t zbias = agx_pool_upload_aligned(&batch->pool, batch->depth_bias.data,
batch->depth_bias.size, 64);
uint64_t zbias = agx_pool_upload_aligned(
&batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
/* BO list for a given batch consists of:
* - BOs for the batch's pools
@ -1009,21 +992,21 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
agx_batch_add_bo(batch, batch->encoder);
/* Occlusion queries are allocated as a contiguous pool */
unsigned oq_count = util_dynarray_num_elements(&batch->occlusion_queries,
struct agx_query *);
unsigned oq_count =
util_dynarray_num_elements(&batch->occlusion_queries, struct agx_query *);
size_t oq_size = oq_count * sizeof(uint64_t);
if (oq_size) {
batch->occlusion_buffer = agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
batch->occlusion_buffer =
agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
memset(batch->occlusion_buffer.cpu, 0, oq_size);
} else {
batch->occlusion_buffer.gpu = 0;
}
unsigned handle_count =
agx_batch_num_bo(batch) +
agx_pool_num_bos(&batch->pool) +
agx_pool_num_bos(&batch->pipeline_pool);
unsigned handle_count = agx_batch_num_bo(batch) +
agx_pool_num_bos(&batch->pool) +
agx_pool_num_bos(&batch->pipeline_pool);
uint32_t *handles = calloc(sizeof(uint32_t), handle_count);
unsigned handle = 0, handle_i = 0;
@ -1044,22 +1027,12 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
unsigned cmdbuf_id = agx_get_global_id(dev);
unsigned encoder_id = agx_get_global_id(dev);
unsigned cmdbuf_size = demo_cmdbuf(dev->cmdbuf.ptr.cpu,
dev->cmdbuf.size,
&batch->pool,
&batch->key,
batch->encoder->ptr.gpu,
encoder_id,
scissor,
zbias,
batch->occlusion_buffer.gpu,
pipeline_background,
pipeline_background_partial,
pipeline_store,
clear_pipeline_textures,
batch->clear,
batch->clear_depth,
batch->clear_stencil);
unsigned cmdbuf_size = demo_cmdbuf(
dev->cmdbuf.ptr.cpu, dev->cmdbuf.size, &batch->pool, &batch->key,
batch->encoder->ptr.gpu, encoder_id, scissor, zbias,
batch->occlusion_buffer.gpu, pipeline_background,
pipeline_background_partial, pipeline_store, clear_pipeline_textures,
batch->clear, batch->clear_depth, batch->clear_stencil);
/* Generate the mapping table from the BO list */
demo_mem_map(dev->memmap.ptr.cpu, dev->memmap.size, handles, handle_count,
@ -1113,8 +1086,7 @@ agx_invalidate_resource(struct pipe_context *pctx,
}
static struct pipe_context *
agx_create_context(struct pipe_screen *screen,
void *priv, unsigned flags)
agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
{
struct agx_context *ctx = rzalloc(NULL, struct agx_context);
struct pipe_context *pctx = &ctx->base;
@ -1165,24 +1137,23 @@ agx_create_context(struct pipe_screen *screen,
}
static void
agx_flush_frontbuffer(struct pipe_screen *_screen,
struct pipe_context *pctx,
struct pipe_resource *prsrc,
unsigned level, unsigned layer,
void *context_private, struct pipe_box *box)
agx_flush_frontbuffer(struct pipe_screen *_screen, struct pipe_context *pctx,
struct pipe_resource *prsrc, unsigned level,
unsigned layer, void *context_private,
struct pipe_box *box)
{
struct agx_resource *rsrc = (struct agx_resource *) prsrc;
struct agx_screen *agx_screen = (struct agx_screen*)_screen;
struct agx_resource *rsrc = (struct agx_resource *)prsrc;
struct agx_screen *agx_screen = (struct agx_screen *)_screen;
struct sw_winsys *winsys = agx_screen->winsys;
/* Dump the framebuffer */
assert (rsrc->dt);
assert(rsrc->dt);
void *map = winsys->displaytarget_map(winsys, rsrc->dt, PIPE_USAGE_DEFAULT);
assert(map != NULL);
if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) {
ail_detile(rsrc->bo->ptr.cpu, map, &rsrc->layout, 0, rsrc->dt_stride,
0, 0, rsrc->base.width0, rsrc->base.height0);
ail_detile(rsrc->bo->ptr.cpu, map, &rsrc->layout, 0, rsrc->dt_stride, 0,
0, rsrc->base.width0, rsrc->base.height0);
} else {
assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
memcpy(map, rsrc->bo->ptr.cpu, rsrc->dt_stride * rsrc->base.height0);
@ -1192,25 +1163,25 @@ agx_flush_frontbuffer(struct pipe_screen *_screen,
}
static const char *
agx_get_vendor(struct pipe_screen* pscreen)
agx_get_vendor(struct pipe_screen *pscreen)
{
return "Mesa";
}
static const char *
agx_get_device_vendor(struct pipe_screen* pscreen)
agx_get_device_vendor(struct pipe_screen *pscreen)
{
return "Apple";
}
static const char *
agx_get_name(struct pipe_screen* pscreen)
agx_get_name(struct pipe_screen *pscreen)
{
return "Apple M1 (G13G B0)";
}
static int
agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;
@ -1290,7 +1261,7 @@ agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
return is_deqp ? 1 : 0;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return 256;
@ -1364,15 +1335,13 @@ agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SUPPORTED_PRIM_MODES:
case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
return BITFIELD_BIT(PIPE_PRIM_POINTS) |
BITFIELD_BIT(PIPE_PRIM_LINES) |
return BITFIELD_BIT(PIPE_PRIM_POINTS) | BITFIELD_BIT(PIPE_PRIM_LINES) |
BITFIELD_BIT(PIPE_PRIM_LINE_STRIP) |
BITFIELD_BIT(PIPE_PRIM_LINE_LOOP) |
BITFIELD_BIT(PIPE_PRIM_TRIANGLES) |
BITFIELD_BIT(PIPE_PRIM_TRIANGLE_STRIP) |
BITFIELD_BIT(PIPE_PRIM_TRIANGLE_FAN) |
BITFIELD_BIT(PIPE_PRIM_QUADS) |
BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
BITFIELD_BIT(PIPE_PRIM_QUADS) | BITFIELD_BIT(PIPE_PRIM_QUAD_STRIP);
default:
return u_pipe_screen_get_param_defaults(pscreen, param);
@ -1380,8 +1349,7 @@ agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
}
static float
agx_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
agx_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
{
switch (param) {
case PIPE_CAPF_MIN_LINE_WIDTH:
@ -1420,14 +1388,12 @@ agx_get_paramf(struct pipe_screen* pscreen,
}
static int
agx_get_shader_param(struct pipe_screen* pscreen,
enum pipe_shader_type shader,
agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
enum pipe_shader_cap param)
{
bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
if (shader != PIPE_SHADER_VERTEX &&
shader != PIPE_SHADER_FRAGMENT)
if (shader != PIPE_SHADER_VERTEX && shader != PIPE_SHADER_FRAGMENT)
return 0;
/* this is probably not totally correct.. but it's a start: */
@ -1511,30 +1477,21 @@ agx_get_shader_param(struct pipe_screen* pscreen,
}
static int
agx_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *ret)
{
return 0;
}
static bool
agx_is_format_supported(struct pipe_screen* pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned storage_sample_count,
unsigned usage)
agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
enum pipe_texture_target target, unsigned sample_count,
unsigned storage_sample_count, unsigned usage)
{
assert(target == PIPE_BUFFER ||
target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_1D_ARRAY ||
target == PIPE_TEXTURE_2D ||
target == PIPE_TEXTURE_2D_ARRAY ||
target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE ||
assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
target == PIPE_TEXTURE_CUBE_ARRAY);
if (sample_count > 1)
@ -1587,9 +1544,8 @@ agx_is_format_supported(struct pipe_screen* pscreen,
}
static void
agx_query_dmabuf_modifiers(struct pipe_screen *screen,
enum pipe_format format, int max,
uint64_t *modifiers,
agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
int max, uint64_t *modifiers,
unsigned int *external_only, int *out_count)
{
int i;
@ -1611,9 +1567,8 @@ agx_query_dmabuf_modifiers(struct pipe_screen *screen,
}
static bool
agx_is_dmabuf_modifier_supported(struct pipe_screen *screen,
uint64_t modifier, enum pipe_format format,
bool *external_only)
agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
enum pipe_format format, bool *external_only)
{
if (external_only)
*external_only = false;
@ -1635,24 +1590,20 @@ agx_destroy_screen(struct pipe_screen *screen)
}
static void
agx_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **ptr,
agx_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
}
static bool
agx_fence_finish(struct pipe_screen *screen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout)
agx_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx,
struct pipe_fence_handle *fence, uint64_t timeout)
{
return true;
}
static const void *
agx_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir,
agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
enum pipe_shader_type shader)
{
return &agx_nir_options;
@ -1668,7 +1619,7 @@ agx_resource_set_stencil(struct pipe_resource *prsrc,
static struct pipe_resource *
agx_resource_get_stencil(struct pipe_resource *prsrc)
{
return (struct pipe_resource *) agx_resource(prsrc)->separate_stencil;
return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
}
static enum pipe_format
@ -1678,14 +1629,14 @@ agx_resource_get_internal_format(struct pipe_resource *prsrc)
}
static const struct u_transfer_vtbl transfer_vtbl = {
.resource_create = agx_resource_create,
.resource_destroy = agx_resource_destroy,
.transfer_map = agx_transfer_map,
.transfer_unmap = agx_transfer_unmap,
.transfer_flush_region = agx_transfer_flush_region,
.get_internal_format = agx_resource_get_internal_format,
.set_stencil = agx_resource_set_stencil,
.get_stencil = agx_resource_get_stencil,
.resource_create = agx_resource_create,
.resource_destroy = agx_resource_destroy,
.transfer_map = agx_transfer_map,
.transfer_unmap = agx_transfer_unmap,
.transfer_flush_region = agx_transfer_flush_region,
.get_internal_format = agx_resource_get_internal_format,
.set_stencil = agx_resource_set_stencil,
.get_stencil = agx_resource_get_stencil,
};
struct pipe_screen *
@ -1751,11 +1702,10 @@ agx_screen_create(int fd, struct renderonly *ro, struct sw_winsys *winsys)
screen->resource_create = u_transfer_helper_resource_create;
screen->resource_destroy = u_transfer_helper_resource_destroy;
screen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
U_TRANSFER_HELPER_SEPARATE_Z32S8 |
U_TRANSFER_HELPER_SEPARATE_STENCIL |
U_TRANSFER_HELPER_MSAA_MAP |
U_TRANSFER_HELPER_Z24_IN_Z32F);
screen->transfer_helper = u_transfer_helper_create(
&transfer_vtbl,
U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
return screen;
}

View file

@ -31,8 +31,8 @@ struct pipe_screen;
struct sw_winsys;
struct renderonly;
struct pipe_screen *
agx_screen_create(int fd, struct renderonly *ro, struct sw_winsys *winsys);
struct pipe_screen *agx_screen_create(int fd, struct renderonly *ro,
struct sw_winsys *winsys);
#ifdef __cplusplus
}

View file

@ -26,7 +26,7 @@ static bool
agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
{
struct agx_context *ctx = agx_context(pctx);
struct agx_query *query = (struct agx_query *) pquery;
struct agx_query *query = (struct agx_query *)pquery;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@ -40,7 +40,8 @@ agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
* avoid the flush.
*/
if (query->writer)
agx_flush_batch_for_reason(ctx, query->writer, "Occlusion overwritten");
agx_flush_batch_for_reason(ctx, query->writer,
"Occlusion overwritten");
assert(query->writer == NULL);
@ -56,7 +57,7 @@ static bool
agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
{
struct agx_context *ctx = agx_context(pctx);
struct agx_query *query = (struct agx_query *) pquery;
struct agx_query *query = (struct agx_query *)pquery;
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@ -72,12 +73,10 @@ agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
}
static bool
agx_get_query_result(struct pipe_context *pctx,
struct pipe_query *pquery,
bool wait,
union pipe_query_result *vresult)
agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
bool wait, union pipe_query_result *vresult)
{
struct agx_query *query = (struct agx_query *) pquery;
struct agx_query *query = (struct agx_query *)pquery;
struct agx_context *ctx = agx_context(pctx);
switch (query->type) {
@ -134,10 +133,11 @@ agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
/* Allocate if needed */
if (query->writer == NULL) {
query->writer = batch;
query->writer_index = util_dynarray_num_elements(&batch->occlusion_queries,
struct agx_query *);
query->writer_index = util_dynarray_num_elements(
&batch->occlusion_queries, struct agx_query *);
util_dynarray_append(&batch->occlusion_queries, struct agx_query *, query);
util_dynarray_append(&batch->occlusion_queries, struct agx_query *,
query);
}
assert(query->writer == batch);
@ -150,7 +150,7 @@ agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
void
agx_finish_batch_occlusion_queries(struct agx_batch *batch)
{
uint64_t *results = (uint64_t *) batch->occlusion_buffer.cpu;
uint64_t *results = (uint64_t *)batch->occlusion_buffer.cpu;
util_dynarray_foreach(&batch->occlusion_queries, struct agx_query *, it) {
struct agx_query *query = *it;

File diff suppressed because it is too large Load diff

View file

@ -25,21 +25,21 @@
#ifndef AGX_STATE_H
#define AGX_STATE_H
#include "gallium/include/pipe/p_context.h"
#include "gallium/include/pipe/p_state.h"
#include "gallium/include/pipe/p_screen.h"
#include "gallium/auxiliary/util/u_blitter.h"
#include "asahi/lib/agx_pack.h"
#include "asahi/lib/agx_bo.h"
#include "asahi/lib/agx_device.h"
#include "asahi/lib/pool.h"
#include "asahi/lib/agx_tilebuffer.h"
#include "asahi/lib/agx_nir_lower_vbo.h"
#include "asahi/compiler/agx_compile.h"
#include "asahi/layout/layout.h"
#include "asahi/lib/agx_bo.h"
#include "asahi/lib/agx_device.h"
#include "asahi/lib/agx_nir_lower_vbo.h"
#include "asahi/lib/agx_pack.h"
#include "asahi/lib/agx_tilebuffer.h"
#include "asahi/lib/pool.h"
#include "compiler/nir/nir_lower_blend.h"
#include "util/hash_table.h"
#include "gallium/auxiliary/util/u_blitter.h"
#include "gallium/include/pipe/p_context.h"
#include "gallium/include/pipe/p_screen.h"
#include "gallium/include/pipe/p_state.h"
#include "util/bitset.h"
#include "util/hash_table.h"
#include "agx_meta.h"
struct agx_streamout_target {
@ -175,25 +175,25 @@ union asahi_shader_key {
};
enum agx_dirty {
AGX_DIRTY_VERTEX = BITFIELD_BIT(0),
AGX_DIRTY_VERTEX = BITFIELD_BIT(0),
AGX_DIRTY_VIEWPORT = BITFIELD_BIT(1),
AGX_DIRTY_SCISSOR_ZBIAS = BITFIELD_BIT(2),
AGX_DIRTY_ZS = BITFIELD_BIT(3),
AGX_DIRTY_SCISSOR_ZBIAS = BITFIELD_BIT(2),
AGX_DIRTY_ZS = BITFIELD_BIT(3),
AGX_DIRTY_STENCIL_REF = BITFIELD_BIT(4),
AGX_DIRTY_RS = BITFIELD_BIT(5),
AGX_DIRTY_RS = BITFIELD_BIT(5),
AGX_DIRTY_SPRITE_COORD_MODE = BITFIELD_BIT(6),
AGX_DIRTY_PRIM = BITFIELD_BIT(7),
AGX_DIRTY_PRIM = BITFIELD_BIT(7),
/* Vertex/fragment pipelines, including uniforms and textures */
AGX_DIRTY_VS = BITFIELD_BIT(8),
AGX_DIRTY_FS = BITFIELD_BIT(9),
AGX_DIRTY_VS = BITFIELD_BIT(8),
AGX_DIRTY_FS = BITFIELD_BIT(9),
/* Just the progs themselves */
AGX_DIRTY_VS_PROG = BITFIELD_BIT(10),
AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
AGX_DIRTY_VS_PROG = BITFIELD_BIT(10),
AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
AGX_DIRTY_BLEND = BITFIELD_BIT(12),
AGX_DIRTY_QUERY = BITFIELD_BIT(13),
AGX_DIRTY_BLEND = BITFIELD_BIT(12),
AGX_DIRTY_QUERY = BITFIELD_BIT(13),
};
#define AGX_MAX_BATCHES (2)
@ -253,7 +253,7 @@ struct agx_context {
static inline struct agx_context *
agx_context(struct pipe_context *pctx)
{
return (struct agx_context *) pctx;
return (struct agx_context *)pctx;
}
void agx_init_query_functions(struct pipe_context *ctx);
@ -325,17 +325,17 @@ agx_device(struct pipe_screen *p)
return &(agx_screen(p)->dev);
}
#define perf_debug(dev, ...) \
do { \
if (unlikely((dev)->debug & AGX_DBG_PERF)) \
mesa_logw(__VA_ARGS__); \
} while(0)
#define perf_debug(dev, ...) \
do { \
if (unlikely((dev)->debug & AGX_DBG_PERF)) \
mesa_logw(__VA_ARGS__); \
} while (0)
#define perf_debug_ctx(ctx, ...) \
perf_debug(agx_device((ctx)->base.screen), __VA_ARGS__);
#define perf_debug_ctx(ctx, ...) \
perf_debug(agx_device((ctx)->base.screen), __VA_ARGS__);
struct agx_resource {
struct pipe_resource base;
struct pipe_resource base;
uint64_t modifier;
/* Should probably be part of the modifier. Affects the tiling algorithm, or
@ -347,7 +347,7 @@ struct agx_resource {
struct agx_bo *bo;
/* Software backing (XXX) */
struct sw_displaytarget *dt;
struct sw_displaytarget *dt;
unsigned dt_stride;
struct renderonly_scanout *scanout;
@ -367,7 +367,7 @@ struct agx_resource {
static inline struct agx_resource *
agx_resource(struct pipe_resource *pctx)
{
return (struct agx_resource *) pctx;
return (struct agx_resource *)pctx;
}
static inline bool
@ -385,7 +385,7 @@ agx_resource_valid(struct agx_resource *rsrc, int level)
static inline void *
agx_map_texture_cpu(struct agx_resource *rsrc, unsigned level, unsigned z)
{
return ((uint8_t *) rsrc->bo->ptr.cpu) +
return ((uint8_t *)rsrc->bo->ptr.cpu) +
ail_get_layer_level_B(&rsrc->layout, z, level);
}
@ -393,7 +393,7 @@ static inline uint64_t
agx_map_texture_gpu(struct agx_resource *rsrc, unsigned z)
{
return rsrc->bo->ptr.gpu +
(uint64_t) ail_get_layer_offset_B(&rsrc->layout, z);
(uint64_t)ail_get_layer_offset_B(&rsrc->layout, z);
}
struct agx_transfer {
@ -411,15 +411,12 @@ agx_transfer(struct pipe_transfer *p)
return (struct agx_transfer *)p;
}
uint64_t
agx_push_location(struct agx_batch *batch, struct agx_push push,
enum pipe_shader_type stage);
uint64_t agx_push_location(struct agx_batch *batch, struct agx_push push,
enum pipe_shader_type stage);
bool
agx_batch_is_active(struct agx_batch *batch);
bool agx_batch_is_active(struct agx_batch *batch);
uint64_t
agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
uint64_t agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt);
/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
* hot paths. To achieve this we model BO lists by bit sets */
@ -444,9 +441,9 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
{
/* Double the size of the BO list if we run out, this is amortized O(1) */
if (unlikely(bo->handle > agx_batch_bo_list_bits(batch))) {
batch->bo_list.set = rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD,
batch->bo_list.word_count,
batch->bo_list.word_count * 2);
batch->bo_list.set =
rerzalloc(batch->ctx, batch->bo_list.set, BITSET_WORD,
batch->bo_list.word_count, batch->bo_list.word_count * 2);
batch->bo_list.word_count *= 2;
}
@ -465,14 +462,18 @@ agx_batch_num_bo(struct agx_batch *batch)
return __bitset_count(batch->bo_list.set, batch->bo_list.word_count);
}
#define AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) \
BITSET_FOREACH_SET(handle, (batch)->bo_list.set, agx_batch_bo_list_bits(batch))
#define AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) \
BITSET_FOREACH_SET(handle, (batch)->bo_list.set, \
agx_batch_bo_list_bits(batch))
void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
void agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch, const char *reason);
void agx_flush_batch_for_reason(struct agx_context *ctx,
struct agx_batch *batch, const char *reason);
void agx_flush_all(struct agx_context *ctx, const char *reason);
void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
const char *reason);
void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
const char *reason);
void agx_flush_batches_writing_occlusion_queries(struct agx_context *ctx);
void agx_flush_occlusion_queries(struct agx_context *ctx);
@ -480,32 +481,28 @@ void agx_flush_occlusion_queries(struct agx_context *ctx);
void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc);
bool agx_any_batch_uses_resource(struct agx_context *ctx, struct agx_resource *rsrc);
bool agx_any_batch_uses_resource(struct agx_context *ctx,
struct agx_resource *rsrc);
struct agx_batch *agx_get_batch(struct agx_context *ctx);
void agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch);
/* Blit shaders */
void
agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
bool render_cond);
void agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
bool render_cond);
void agx_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info);
void agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info);
/* Batch logic */
void
agx_batch_init_state(struct agx_batch *batch);
void agx_batch_init_state(struct agx_batch *batch);
uint64_t
agx_build_meta(struct agx_batch *batch, bool store, bool partial_render);
uint64_t agx_build_meta(struct agx_batch *batch, bool store,
bool partial_render);
/* Query management */
uint16_t
agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
uint16_t agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
void
agx_finish_batch_occlusion_queries(struct agx_batch *batch);
void agx_finish_batch_occlusion_queries(struct agx_batch *batch);
#endif

View file

@ -21,8 +21,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include "agx_state.h"
#include "asahi/lib/agx_pack.h"
#include "agx_state.h"
/* Computes the address for a push uniform, adding referenced BOs to the
* current batch as necessary. Note anything uploaded via the batch's pool does
@ -30,8 +30,7 @@
* once at submit time. */
static uint64_t
agx_const_buffer_ptr(struct agx_batch *batch,
struct pipe_constant_buffer *cb)
agx_const_buffer_ptr(struct agx_batch *batch, struct pipe_constant_buffer *cb)
{
if (cb->buffer) {
struct agx_resource *rsrc = agx_resource(cb->buffer);
@ -39,9 +38,9 @@ agx_const_buffer_ptr(struct agx_batch *batch,
return rsrc->bo->ptr.gpu + cb->buffer_offset;
} else {
return agx_pool_upload_aligned(&batch->pool,
((uint8_t *) cb->user_buffer) + cb->buffer_offset,
cb->buffer_size - cb->buffer_offset, 64);
return agx_pool_upload_aligned(
&batch->pool, ((uint8_t *)cb->user_buffer) + cb->buffer_offset,
cb->buffer_size - cb->buffer_offset, 64);
}
}
@ -55,7 +54,8 @@ agx_push_location_direct(struct agx_batch *batch, struct agx_push push,
switch (push.type) {
case AGX_PUSH_UBO_BASES: {
unsigned count = util_last_bit(st->cb_mask);
struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8);
struct agx_ptr ptr =
agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8);
uint64_t *addresses = ptr.cpu;
for (unsigned i = 0; i < count; ++i) {
@ -67,7 +67,8 @@ agx_push_location_direct(struct agx_batch *batch, struct agx_push push,
}
case AGX_PUSH_VBO_BASE: {
struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, sizeof(uint64_t), 8);
struct agx_ptr ptr =
agx_pool_alloc_aligned(&batch->pool, sizeof(uint64_t), 8);
uint64_t *address = ptr.cpu;
assert(ctx->vb_mask & BITFIELD_BIT(push.vbo) && "oob");
@ -87,14 +88,14 @@ agx_push_location_direct(struct agx_batch *batch, struct agx_push push,
return ptr.gpu;
}
case AGX_PUSH_BLEND_CONST:
{
case AGX_PUSH_BLEND_CONST: {
return agx_pool_upload_aligned(&batch->pool, &ctx->blend_color,
sizeof(ctx->blend_color), 8);
sizeof(ctx->blend_color), 8);
}
case AGX_PUSH_TEXTURE_BASE: {
struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, sizeof(uint64_t), 8);
struct agx_ptr ptr =
agx_pool_alloc_aligned(&batch->pool, sizeof(uint64_t), 8);
uint64_t *address = ptr.cpu;
*address = batch->textures;
return ptr.gpu;

View file

@ -21,9 +21,9 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#
#include "magic.h"
#include <stdint.h>
#include "agx_state.h"
#include "magic.h"
/* The structures managed in this file appear to be software defined (either in
* the macOS kernel driver or in the AGX firmware) */
@ -32,7 +32,8 @@
static uint64_t
demo_unk6(struct agx_pool *pool)
{
struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64);
struct agx_ptr ptr =
agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64);
uint64_t *buf = ptr.cpu;
memset(buf, 0, sizeof(*buf));
@ -120,7 +121,8 @@ asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc,
}
static unsigned
asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer)
asahi_pack_iogpu_attachments(void *out,
struct pipe_framebuffer_state *framebuffer)
{
unsigned total_attachment_size = asahi_size_attachments(framebuffer);
struct agx_iogpu_attachment_packed *attachments = out;
@ -133,42 +135,34 @@ asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuff
}
if (framebuffer->zsbuf) {
struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
asahi_pack_iogpu_attachment(attachments + (nr++), rsrc,
total_attachment_size);
if (rsrc->separate_stencil) {
asahi_pack_iogpu_attachment(attachments + (nr++),
rsrc, total_attachment_size);
if (rsrc->separate_stencil) {
asahi_pack_iogpu_attachment(attachments + (nr++),
rsrc->separate_stencil,
total_attachment_size);
}
rsrc->separate_stencil,
total_attachment_size);
}
}
return nr;
}
unsigned
demo_cmdbuf(uint64_t *buf, size_t size,
struct agx_pool *pool,
struct pipe_framebuffer_state *framebuffer,
uint64_t encoder_ptr,
uint64_t encoder_id,
uint64_t scissor_ptr,
uint64_t depth_bias_ptr,
uint64_t occlusion_ptr,
uint32_t pipeline_clear,
uint32_t pipeline_load,
uint32_t pipeline_store,
bool clear_pipeline_textures,
unsigned clear_buffers,
double clear_depth,
unsigned clear_stencil)
demo_cmdbuf(uint64_t *buf, size_t size, struct agx_pool *pool,
struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr,
uint64_t encoder_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
uint64_t occlusion_ptr, uint32_t pipeline_clear,
uint32_t pipeline_load, uint32_t pipeline_store,
bool clear_pipeline_textures, unsigned clear_buffers,
double clear_depth, unsigned clear_stencil)
{
bool should_clear_depth = clear_buffers & PIPE_CLEAR_DEPTH;
bool should_clear_stencil = clear_buffers & PIPE_CLEAR_STENCIL;
uint32_t *map = (uint32_t *) buf;
uint32_t *map = (uint32_t *)buf;
memset(map, 0, 518 * 4);
uint64_t deflake_buffer = demo_zero(pool, 0x7e0);
@ -187,7 +181,8 @@ demo_cmdbuf(uint64_t *buf, size_t size,
cfg.deflake_2 = deflake_2;
cfg.deflake_3 = deflake_buffer;
cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
cfg.clear_pipeline_bind =
0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
cfg.clear_pipeline = pipeline_clear;
/* store pipeline used when entire frame completes */
@ -223,8 +218,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
if (zsres->separate_stencil) {
sres = zsres->separate_stencil;
stencil_buffer = agx_map_surface_resource(zsbuf,
sres);
stencil_buffer = agx_map_surface_resource(zsbuf, sres);
}
if (zres) {
@ -235,7 +229,8 @@ demo_cmdbuf(uint64_t *buf, size_t size,
cfg.depth_buffer_3 = depth_buffer;
if (ail_is_compressed(&zres->layout)) {
uint64_t accel_buffer = depth_buffer + zres->layout.metadata_offset_B;
uint64_t accel_buffer =
depth_buffer + zres->layout.metadata_offset_B;
cfg.depth_acceleration_buffer_1 = accel_buffer;
cfg.depth_acceleration_buffer_2 = accel_buffer;
cfg.depth_acceleration_buffer_3 = accel_buffer;
@ -253,7 +248,8 @@ demo_cmdbuf(uint64_t *buf, size_t size,
cfg.stencil_buffer_3 = stencil_buffer;
if (ail_is_compressed(&sres->layout)) {
uint64_t accel_buffer = stencil_buffer + sres->layout.metadata_offset_B;
uint64_t accel_buffer =
stencil_buffer + sres->layout.metadata_offset_B;
cfg.stencil_acceleration_buffer_1 = accel_buffer;
cfg.stencil_acceleration_buffer_2 = accel_buffer;
cfg.stencil_acceleration_buffer_3 = accel_buffer;
@ -309,13 +305,13 @@ demo_cmdbuf(uint64_t *buf, size_t size,
unsigned offset_unk = (484 * 4);
unsigned offset_attachments = (496 * 4);
unsigned nr_attachments =
asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4,
framebuffer);
unsigned nr_attachments = asahi_pack_iogpu_attachments(
map + (offset_attachments / 4) + 4, framebuffer);
map[(offset_attachments / 4) + 3] = nr_attachments;
unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
unsigned total_size =
offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
agx_pack(map, IOGPU_HEADER, cfg) {
cfg.total_size = total_size;
@ -329,7 +325,8 @@ demo_cmdbuf(uint64_t *buf, size_t size,
}
static struct agx_map_header
demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count)
demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size,
unsigned count)
{
/* Structure: header followed by resource groups. For now, we use a single
* resource group for every resource. This could be optimized.
@ -338,7 +335,7 @@ demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, u
length += count * sizeof(struct agx_map_entry);
assert(length < 0x10000);
return (struct agx_map_header) {
return (struct agx_map_header){
.cmdbuf_id = cmdbuf_id,
.segment_count = 1,
.length = length,
@ -356,19 +353,21 @@ demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size)
{
struct agx_map_header *header = map;
struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header));
struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size);
struct agx_map_entry *entries =
(struct agx_map_entry *)(((uint8_t *)map) + sizeof(*header));
struct agx_map_entry *end =
(struct agx_map_entry *)(((uint8_t *)map) + size);
/* Header precedes the entry */
*header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count);
/* Add an entry for each BO mapped */
for (unsigned i = 0; i < count; ++i) {
assert((entries + i) < end);
entries[i] = (struct agx_map_entry) {
.resource_id = { handles[i] },
.resource_unk = { 0x20 },
.resource_flags = { 0x1 },
assert((entries + i) < end);
entries[i] = (struct agx_map_entry){
.resource_id = {handles[i]},
.resource_unk = {0x20},
.resource_flags = {0x1},
.resource_count = 1,
};
}

View file

@ -24,29 +24,20 @@
#ifndef __ASAHI_MAGIC_H
#define __ASAHI_MAGIC_H
#include "agx_state.h"
#include <stdint.h>
#include "agx_state.h"
unsigned
demo_cmdbuf(uint64_t *buf, size_t size,
struct agx_pool *pool,
struct pipe_framebuffer_state *framebuffer,
uint64_t encoder_ptr,
uint64_t encoder_id,
uint64_t scissor_ptr,
uint64_t depth_bias_ptr,
uint64_t occlusion_ptr,
uint32_t pipeline_clear,
uint32_t pipeline_load,
uint32_t pipeline_store,
bool clear_pipeline_textures,
unsigned clear_buffers,
double clear_depth,
unsigned clear_stencil);
unsigned demo_cmdbuf(uint64_t *buf, size_t size, struct agx_pool *pool,
struct pipe_framebuffer_state *framebuffer,
uint64_t encoder_ptr, uint64_t encoder_id,
uint64_t scissor_ptr, uint64_t depth_bias_ptr,
uint64_t occlusion_ptr, uint32_t pipeline_clear,
uint32_t pipeline_load, uint32_t pipeline_store,
bool clear_pipeline_textures, unsigned clear_buffers,
double clear_depth, unsigned clear_stencil);
void
demo_mem_map(void *map, size_t size, unsigned *handles,
unsigned count, uint64_t cmdbuf_id, uint64_t
encoder_id, unsigned cmdbuf_size);
void demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
uint64_t cmdbuf_id, uint64_t encoder_id,
unsigned cmdbuf_size);
#endif