mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 19:30:11 +01:00
intel: Add executor tool
Add a tool that programs the hardware the minimum amount to be
able to execute compute shaders and then executes a script that
can perform data manipulation and dispatch execution of the shaders
(written in Xe assembly).
The goal is to have a tool to experiment directly with certain
assembly instructions and the shared units without having to
instrument the drivers.
To make more convenient to write assembly, a few macros (indicated by
the @-symbol) will be processed into the full instruction.
For example, the script
```
local r = execute {
data={ [42] = 0x100 },
src=[[
@mov g1 42
@read g2 g1
@id g3
add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q };
@write g3 g4
@eot
]]
}
dump(r, 4)
```
produces
```
[0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103
```
There's a help message inside the code that describes the script
environment and the macros for assembly sources.
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30062>
This commit is contained in:
parent
6267585778
commit
e72bf2d02f
12 changed files with 1729 additions and 0 deletions
41
src/intel/executor/examples/bfi.lua
Normal file
41
src/intel/executor/examples/bfi.lua
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
-- BFI seems available on Gfx9, need to fix the emission code for that.
|
||||
check_verx10(110, 120, 125, 200)
|
||||
|
||||
function BFI_simulation(a, b, c, d)
|
||||
local width = a & 0x1F
|
||||
local offset = b & 0x1F
|
||||
local mask = ((1 << width) - 1) << offset
|
||||
return ((c << offset) & mask) | (d & ~mask)
|
||||
end
|
||||
|
||||
function BFI(a, b, c, d)
|
||||
local r = execute {
|
||||
data = { [0] = a, b, c, d },
|
||||
src = [[
|
||||
@id g9
|
||||
@mov g11 0
|
||||
@mov g12 1
|
||||
@mov g13 2
|
||||
@mov g14 3
|
||||
|
||||
@read g1 g11
|
||||
@read g2 g12
|
||||
@read g3 g13
|
||||
@read g4 g14
|
||||
|
||||
bfi1(8) g5<1>UD g1<8,8,1>UD g2<8,8,1>UD { align1 @1 1Q };
|
||||
bfi2(8) g6<1>UD g5<8,8,1>UD g3<8,8,1>UD g4<8,8,1>UD { align1 @1 1Q };
|
||||
|
||||
@write g9 g6
|
||||
@eot
|
||||
]],
|
||||
}
|
||||
return r[0]
|
||||
end
|
||||
|
||||
function Hex(v) return string.format("0x%08x", v) end
|
||||
|
||||
local a, b, c, d = 12, 12, 0xAAAAAAAA, 0xBBBBBBBB
|
||||
|
||||
print("calculated", Hex(BFI(a, b, c, d)))
|
||||
print("expected", Hex(BFI_simulation(a, b, c, d)))
|
||||
41
src/intel/executor/examples/dp4a.lua
Normal file
41
src/intel/executor/examples/dp4a.lua
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
--[[
|
||||
|
||||
Execute the example from the Dot Product 4 Accumulate
|
||||
instruction as seen in the PRM.
|
||||
|
||||
mov (1) r1.0:d 0x0102037F:d
|
||||
// (char4)(0x1,0x2,0x3,0x7F)
|
||||
mov (1) r2.0:d 50:d
|
||||
dp4a (1) r3.0:d r2:d r1:d r1:d
|
||||
// r3.0 = 50 + (0x1*0x1 + 0x2*0x2 + 0x3*0x3 + 0x7F*0x7F)
|
||||
// = 50 + (1 + 4 + 9 + 16129)
|
||||
// = 16193
|
||||
|
||||
--]]
|
||||
|
||||
check_ver(12)
|
||||
|
||||
function DP4A(a, b, c)
|
||||
local r = c
|
||||
for i = 1, 4 do
|
||||
r = r + a[i] * b[i]
|
||||
end
|
||||
return r
|
||||
end
|
||||
|
||||
local r = execute {
|
||||
src = [[
|
||||
@id g9
|
||||
|
||||
@mov g1 0x0102037F
|
||||
@mov g2 50
|
||||
|
||||
dp4a(8) g3<1>UD g2<8,8,1>UD g1<8,8,1>UD g1<8,8,1>UD { align1 @1 1Q };
|
||||
|
||||
@write g9 g3
|
||||
@eot
|
||||
]],
|
||||
}
|
||||
|
||||
print("expected", DP4A({1,2,3,0x7F}, {1,2,3,0x7F}, 50))
|
||||
print("calculated", r[0])
|
||||
18
src/intel/executor/examples/help_example.lua
Normal file
18
src/intel/executor/examples/help_example.lua
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
-- Example from the help message.
|
||||
|
||||
local r = execute {
|
||||
data={ [42] = 0x100 },
|
||||
src=[[
|
||||
@mov g1 42
|
||||
@read g2 g1
|
||||
|
||||
@id g3
|
||||
|
||||
add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q };
|
||||
|
||||
@write g3 g4
|
||||
@eot
|
||||
]]
|
||||
}
|
||||
|
||||
dump(r, 4)
|
||||
6
src/intel/executor/examples/nop.lua
Normal file
6
src/intel/executor/examples/nop.lua
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
execute {
|
||||
src = [[
|
||||
nop;
|
||||
@eot
|
||||
]],
|
||||
}
|
||||
20
src/intel/executor/examples/test.lua
Normal file
20
src/intel/executor/examples/test.lua
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
local data = {}
|
||||
for i = 0, 8-1 do
|
||||
data[i] = i * 4
|
||||
end
|
||||
|
||||
local r = execute {
|
||||
data = data,
|
||||
src = [[
|
||||
@id g1
|
||||
@read g3 g1
|
||||
|
||||
add(8) g3<1>UD g3<8,8,1>UD 0x100UD { align1 1Q };
|
||||
|
||||
@write g1 g3
|
||||
|
||||
@eot
|
||||
]],
|
||||
}
|
||||
|
||||
dump(r, 8)
|
||||
94
src/intel/executor/executor.h
Normal file
94
src/intel/executor/executor.h
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef EXECUTOR_H
|
||||
#define EXECUTOR_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "intel/dev/intel_device_info.h"
|
||||
#include "intel/isl/isl.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t size;
|
||||
uint32_t handle;
|
||||
void *map;
|
||||
void *cursor;
|
||||
uint64_t addr;
|
||||
} executor_bo;
|
||||
|
||||
typedef struct {
|
||||
void *mem_ctx;
|
||||
|
||||
struct intel_device_info *devinfo;
|
||||
struct isl_device *isl_dev;
|
||||
int fd;
|
||||
|
||||
struct {
|
||||
uint32_t ctx_id;
|
||||
} i915;
|
||||
|
||||
struct {
|
||||
uint32_t vm_id;
|
||||
uint32_t queue_id;
|
||||
} xe;
|
||||
|
||||
struct {
|
||||
executor_bo batch;
|
||||
executor_bo extra;
|
||||
executor_bo data;
|
||||
} bo;
|
||||
|
||||
uint64_t batch_start;
|
||||
} executor_context;
|
||||
|
||||
typedef struct {
|
||||
const char *original_src;
|
||||
|
||||
void *kernel_bin;
|
||||
uint32_t kernel_size;
|
||||
} executor_params;
|
||||
|
||||
typedef struct {
|
||||
uint64_t offset;
|
||||
} executor_address;
|
||||
|
||||
__attribute__((unused)) static uint64_t
|
||||
executor_combine_address(void *data, void *location,
|
||||
executor_address address, uint32_t delta)
|
||||
{
|
||||
return address.offset + delta;
|
||||
}
|
||||
|
||||
executor_address executor_address_of_ptr(executor_bo *bo, void *ptr);
|
||||
|
||||
void *executor_alloc_bytes(executor_bo *bo, uint32_t size);
|
||||
void *executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment);
|
||||
|
||||
void failf(const char *fmt, ...) PRINTFLIKE(1, 2);
|
||||
|
||||
const char *executor_apply_macros(executor_context *ec, const char *original_src);
|
||||
|
||||
#ifdef genX
|
||||
# include "executor_genx.h"
|
||||
#else
|
||||
# define genX(x) gfx9_##x
|
||||
# include "executor_genx.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx11_##x
|
||||
# include "executor_genx.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx12_##x
|
||||
# include "executor_genx.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx125_##x
|
||||
# include "executor_genx.h"
|
||||
# undef genX
|
||||
# define genX(x) gfx20_##x
|
||||
# include "executor_genx.h"
|
||||
# undef genX
|
||||
#endif
|
||||
|
||||
#endif /* EXECUTOR_H */
|
||||
183
src/intel/executor/executor_genx.c
Normal file
183
src/intel/executor/executor_genx.c
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "executor.h"
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
#include <valgrind.h>
|
||||
#include <memcheck.h>
|
||||
#define VG(x) x
|
||||
#else
|
||||
#define VG(x) ((void)0)
|
||||
#endif
|
||||
|
||||
#define __gen_address_type executor_address
|
||||
#define __gen_combine_address executor_combine_address
|
||||
#define __gen_user_data void
|
||||
|
||||
#include "intel/genxml/gen_macros.h"
|
||||
#include "intel/genxml/genX_pack.h"
|
||||
|
||||
#define __executor_cmd_length(cmd) cmd ## _length
|
||||
#define __executor_cmd_header(cmd) cmd ## _header
|
||||
#define __executor_cmd_pack(cmd) cmd ## _pack
|
||||
|
||||
#define executor_batch_emit(cmd, name) \
|
||||
for (struct cmd name = { __executor_cmd_header(cmd) }, \
|
||||
*_dst = executor_alloc_bytes(&ec->bo.batch, __executor_cmd_length(cmd) * 4); \
|
||||
__builtin_expect(_dst != NULL, 1); \
|
||||
({ __executor_cmd_pack(cmd)(0, _dst, &name); \
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __executor_cmd_length(cmd) * 4)); \
|
||||
_dst = NULL; \
|
||||
}))
|
||||
|
||||
static void
|
||||
emit_pipe_control(executor_context *ec)
|
||||
{
|
||||
executor_batch_emit(GENX(PIPE_CONTROL), pc) {
|
||||
#if GFX_VER >= 12
|
||||
pc.HDCPipelineFlushEnable = true;
|
||||
#endif
|
||||
pc.PipeControlFlushEnable = true;
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_state_base_address(executor_context *ec, uint32_t mocs)
|
||||
{
|
||||
/* Use the full address for everything. */
|
||||
const executor_address base_address = {0};
|
||||
const uint32_t size = (1 << 20) - 1;
|
||||
|
||||
executor_batch_emit(GENX(STATE_BASE_ADDRESS), sba) {
|
||||
sba.GeneralStateBaseAddress = base_address;
|
||||
sba.GeneralStateBaseAddressModifyEnable = true;
|
||||
sba.GeneralStateBufferSize = size;
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.GeneralStateMOCS = mocs;
|
||||
|
||||
sba.DynamicStateBaseAddress = base_address;
|
||||
sba.DynamicStateBaseAddressModifyEnable = true;
|
||||
sba.DynamicStateBufferSize = size;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateMOCS = mocs;
|
||||
|
||||
sba.InstructionBaseAddress = base_address;
|
||||
sba.InstructionBaseAddressModifyEnable = true;
|
||||
sba.InstructionBufferSize = size;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
sba.InstructionMOCS = mocs;
|
||||
|
||||
sba.IndirectObjectBaseAddress = base_address;
|
||||
sba.IndirectObjectBaseAddressModifyEnable = true;
|
||||
sba.IndirectObjectBufferSize = size;
|
||||
sba.IndirectObjectBufferSizeModifyEnable = true;
|
||||
sba.IndirectObjectMOCS = mocs;
|
||||
|
||||
sba.SurfaceStateMOCS = mocs;
|
||||
sba.StatelessDataPortAccessMOCS = mocs;
|
||||
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
#endif
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
sba.L1CacheControl = L1CC_WB;
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
void
|
||||
genX(emit_execute)(executor_context *ec, const executor_params *params)
|
||||
{
|
||||
uint32_t *kernel = executor_alloc_bytes(&ec->bo.extra, params->kernel_size);
|
||||
memcpy(kernel, params->kernel_bin, params->kernel_size);
|
||||
executor_address kernel_addr = executor_address_of_ptr(&ec->bo.extra, kernel);
|
||||
|
||||
/* TODO: Let SIMD be a parameter. */
|
||||
|
||||
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
||||
.KernelStartPointer = kernel_addr.offset,
|
||||
.NumberofThreadsinGPGPUThreadGroup = 1,
|
||||
};
|
||||
|
||||
void *b = executor_alloc_bytes_aligned(&ec->bo.batch, 0, 256);
|
||||
ec->batch_start = executor_address_of_ptr(&ec->bo.batch, b).offset;
|
||||
|
||||
emit_pipe_control(ec);
|
||||
|
||||
#if GFX_VERx10 < 200
|
||||
executor_batch_emit(GENX(PIPELINE_SELECT), ps) {
|
||||
ps.PipelineSelection = GPGPU;
|
||||
ps.MaskBits = 0x3;
|
||||
}
|
||||
emit_pipe_control(ec);
|
||||
#endif
|
||||
|
||||
const uint32_t mocs = isl_mocs(ec->isl_dev, 0, false);
|
||||
|
||||
emit_state_base_address(ec, mocs);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
executor_batch_emit(GENX(STATE_COMPUTE_MODE), cm) {
|
||||
cm.Mask1 = 0xffff;
|
||||
#if GFX_VERx10 >= 200
|
||||
cm.Mask2 = 0xffff;
|
||||
#endif
|
||||
}
|
||||
|
||||
executor_batch_emit(GENX(CFE_STATE), cfe) {
|
||||
cfe.MaximumNumberofThreads = 64;
|
||||
}
|
||||
#else
|
||||
executor_batch_emit(GENX(MEDIA_VFE_STATE), vfe) {
|
||||
vfe.NumberofURBEntries = 2;
|
||||
vfe.MaximumNumberofThreads = 64;
|
||||
}
|
||||
#endif
|
||||
|
||||
emit_pipe_control(ec);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
|
||||
#if GFX_VERx10 >= 200
|
||||
cw.SIMDSize = 1;
|
||||
cw.MessageSIMD = 1;
|
||||
#endif
|
||||
cw.ThreadGroupIDXDimension = 1;
|
||||
cw.ThreadGroupIDYDimension = 1;
|
||||
cw.ThreadGroupIDZDimension = 1;
|
||||
cw.ExecutionMask = 0xFFFFFFFF;
|
||||
cw.PostSync.MOCS = mocs;
|
||||
cw.InterfaceDescriptor = desc;
|
||||
};
|
||||
#else
|
||||
uint32_t *idd = executor_alloc_bytes_aligned(&ec->bo.extra, 8 * 4, 256);
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, idd, &desc);
|
||||
|
||||
executor_address idd_addr = executor_address_of_ptr(&ec->bo.extra, idd);
|
||||
|
||||
executor_batch_emit(GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
|
||||
load.InterfaceDescriptorDataStartAddress = idd_addr.offset,
|
||||
load.InterfaceDescriptorTotalLength = 8 * 4;
|
||||
}
|
||||
|
||||
executor_batch_emit(GENX(GPGPU_WALKER), gw) {
|
||||
gw.ThreadGroupIDXDimension = 1;
|
||||
gw.ThreadGroupIDYDimension = 1;
|
||||
gw.ThreadGroupIDZDimension = 1;
|
||||
gw.RightExecutionMask = 0xFFFFFFFF;
|
||||
gw.BottomExecutionMask = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
executor_batch_emit(GENX(MEDIA_STATE_FLUSH), msf);
|
||||
#endif
|
||||
|
||||
emit_pipe_control(ec);
|
||||
|
||||
executor_batch_emit(GENX(MI_BATCH_BUFFER_END), end);
|
||||
}
|
||||
10
src/intel/executor/executor_genx.h
Normal file
10
src/intel/executor/executor_genx.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef EXECUTOR_H
|
||||
#error This file must be included via executor.h
|
||||
#endif
|
||||
|
||||
void genX(emit_execute)(executor_context *ec, const executor_params *params);
|
||||
407
src/intel/executor/executor_macros.c
Normal file
407
src/intel/executor/executor_macros.c
Normal file
|
|
@ -0,0 +1,407 @@
|
|||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "intel/compiler/brw_asm.h"
|
||||
|
||||
#include "executor.h"
|
||||
|
||||
static bool
|
||||
startswith(const char *prefix, const char *s)
|
||||
{
|
||||
return !strncmp(prefix, s, strlen(prefix));
|
||||
}
|
||||
|
||||
static char *
|
||||
skip_prefix(char *prefix, char *start)
|
||||
{
|
||||
assert(startswith(prefix, start));
|
||||
char *c = start += strlen(prefix);
|
||||
return c;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char **args;
|
||||
int count;
|
||||
} parse_args_result;
|
||||
|
||||
static parse_args_result
|
||||
parse_args(void *mem_ctx, char *c)
|
||||
{
|
||||
parse_args_result r = {0};
|
||||
|
||||
while (*c) {
|
||||
/* Skip spaces. */
|
||||
while (*c && isspace(*c))
|
||||
c++;
|
||||
if (!*c)
|
||||
break;
|
||||
|
||||
/* Copy non-spaces. */
|
||||
char *start = c;
|
||||
while (*c && !isspace(*c))
|
||||
c++;
|
||||
r.args = reralloc_array_size(mem_ctx, r.args, sizeof(char *), r.count + 1);
|
||||
r.args[r.count++] = ralloc_strndup(mem_ctx, start, c - start);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_mov(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
char *c = skip_prefix("@mov", line);
|
||||
parse_args_result r = parse_args(ec->mem_ctx, c);
|
||||
|
||||
if (r.count != 2)
|
||||
failf("@mov needs 2 arguments, found %d\n", r.count);
|
||||
|
||||
const char *reg = r.args[0];
|
||||
char *value = r.args[1];
|
||||
|
||||
if (strchr(value, '.')) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} val;
|
||||
|
||||
val.f = strtof(value, NULL);
|
||||
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110:
|
||||
case 120:
|
||||
case 125: {
|
||||
ralloc_asprintf_append(src, "mov(8) %s<1>F 0x%08xF /* %f */ { align1 1Q };\n", reg, val.u, val.f);
|
||||
break;
|
||||
}
|
||||
case 200: {
|
||||
ralloc_asprintf_append(src, "mov(16) %s<1>F 0x%08xF /* %f */ { align1 1H };\n", reg, val.u, val.f);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
|
||||
} else {
|
||||
for (char *c = value; *c; c++)
|
||||
*c = tolower(*c);
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110:
|
||||
case 120:
|
||||
case 125: {
|
||||
ralloc_asprintf_append(src, "mov(8) %s<1>UD %sUD { align1 1Q };\n", reg, value);
|
||||
break;
|
||||
}
|
||||
|
||||
case 200: {
|
||||
ralloc_asprintf_append(src, "mov(16) %s<1>UD %sUD { align1 1H };\n", reg, value);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_syncnop(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110: {
|
||||
/* Not needed. */
|
||||
break;
|
||||
}
|
||||
|
||||
case 120: {
|
||||
ralloc_strcat(src, "sync nop(8) null<0,1,0>UD { align1 WE_all 1H @1 $1 };\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case 125:
|
||||
case 200: {
|
||||
ralloc_strcat(src, "sync nop(8) null<0,1,0>UD { align1 WE_all 1H A@1 $1 };\n");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_eot(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110: {
|
||||
ralloc_strcat(src,
|
||||
"mov(8) g127<1>UD g0<8;8,1>UD { align1 WE_all 1Q };\n"
|
||||
"send(8) null<1>UW g127<0,1,0>UD 0x82000010\n"
|
||||
" thread_spawner MsgDesc: mlen 1 rlen 0 { align1 WE_all 1Q EOT };\n");
|
||||
break;
|
||||
}
|
||||
case 120: {
|
||||
ralloc_strcat(src,
|
||||
"mov(8) g127<1>UD g0<8;8,1>UD { align1 WE_all 1Q };\n"
|
||||
"send(8) nullUD g127UD nullUD 0x02000000 0x00000000\n"
|
||||
" thread_spawner MsgDesc: mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1Q @1 EOT };\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case 125: {
|
||||
ralloc_strcat(src,
|
||||
"mov(8) g127<1>UD g0<8;8,1>UD { align1 WE_all 1Q };\n"
|
||||
"send(8) nullUD g127UD nullUD 0x02000000 0x00000000\n"
|
||||
" gateway MsgDesc: (open) mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1Q A@1 EOT };\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case 200: {
|
||||
ralloc_strcat(src,
|
||||
"mov(16) g127<1>UD g0<1,1,0>UD { align1 WE_all 1H };\n"
|
||||
"send(16) nullUD g127UD nullUD 0x02000000 0x00000000\n"
|
||||
" gateway MsgDesc: (open) mlen 1 ex_mlen 0 rlen 0 { align1 WE_all 1H I@1 EOT };\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_id(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
char *c = skip_prefix("@id", line);
|
||||
parse_args_result r = parse_args(ec->mem_ctx, c);
|
||||
|
||||
if (r.count != 1)
|
||||
failf("@id needs 1 argument, found %d\n", r.count);
|
||||
|
||||
const char *reg = r.args[0];
|
||||
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110:
|
||||
case 120:
|
||||
case 125: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mov(8) g127<1>UW 0x76543210V { align1 WE_all 1Q };\n"
|
||||
"mov(8) %s<1>UD g127<8,8,1>UW { align1 WE_all 1Q @1 };\n", reg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 200: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mov(8) g127<1>UW 0x76543210V { align1 WE_all 1Q };\n"
|
||||
"add(8) g127.8<1>UW g127<1,1,0>UW 8UW { align1 WE_all 1Q @1 };\n"
|
||||
"mov(16) %s<1>UD g127<8,8,1>UW { align1 WE_all 1Q @1 };\n", reg);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_write(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
char *c = skip_prefix("@write", line);
|
||||
parse_args_result r = parse_args(ec->mem_ctx, c);
|
||||
|
||||
if (r.count != 2)
|
||||
failf("@write needs 2 arguments, found %d\n", r.count);
|
||||
|
||||
const char *offset_reg = r.args[0];
|
||||
const char *data_reg = r.args[1];
|
||||
|
||||
assert(ec->bo.data.addr <= 0xFFFFFFFF);
|
||||
uint32_t base_addr = ec->bo.data.addr;
|
||||
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110:
|
||||
case 120: {
|
||||
const char *send_suffix = ec->devinfo->verx10 < 120 ? "s" : "";
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(8) g127<1>UD %s<8;8,1>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(8) g127<1>UD g127<8;8,1>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send%s(8) nullUD g127UD %sUD 0x2026efd 0x00000040\n"
|
||||
" dp data 1 MsgDesc: (DC untyped surface write, Surface = 253, "
|
||||
" SIMD8, Mask = 0xe) mlen 1 ex_mlen 1 rlen 0 "
|
||||
" { align1 1Q @1 $1 };\n",
|
||||
offset_reg, base_addr, send_suffix, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
case 125: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(8) g127<1>UD %s<1;1,0>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(8) g127<1>UD g127<1;1,0>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send(8) nullUD g127UD %sUD 0x02000504 0x00000040\n"
|
||||
" ugm MsgDesc: ( store, a32, d32, x, L1STATE_L3MOCS dst_len = 0, "
|
||||
" src0_len = 1, src1_len = 1, flat ) base_offset 0 "
|
||||
" { align1 1Q A@1 $1 };\n",
|
||||
offset_reg, base_addr, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
case 200: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(16) g127<1>UD %s<1;1,0>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(16) g127<1>UD g127<1;1,0>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send(16) nullUD g127UD %sUD 0x02000504 0x00000040\n"
|
||||
" ugm MsgDesc: ( store, a32, d32, x, L1STATE_L3MOCS dst_len = 0, "
|
||||
" src0_len = 1, src1_len = 1, flat ) base_offset 0 "
|
||||
" { align1 1Q A@1 $1 };\n",
|
||||
offset_reg, base_addr, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_macro_read(executor_context *ec, char **src, char *line)
|
||||
{
|
||||
char *c = skip_prefix("@read", line);
|
||||
parse_args_result r = parse_args(ec->mem_ctx, c);
|
||||
|
||||
if (r.count != 2)
|
||||
failf("@read needs 2 arguments, found %d\n", r.count);
|
||||
|
||||
/* Order follows underlying SEND, destination first. */
|
||||
const char *data_reg = r.args[0];
|
||||
const char *offset_reg = r.args[1];
|
||||
|
||||
assert(ec->bo.data.addr <= 0xFFFFFFFF);
|
||||
uint32_t base_addr = ec->bo.data.addr;
|
||||
|
||||
switch (ec->devinfo->verx10) {
|
||||
case 90:
|
||||
case 110:
|
||||
case 120: {
|
||||
const char *send_suffix = ec->devinfo->verx10 < 120 ? "s" : "";
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(8) g127<1>UD %s<8;8,1>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(8) g127<1>UD g127<8;8,1>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send%s(8) %sUD g127UD nullUD 0x2106efd 0x00000000\n"
|
||||
" dp data 1 MsgDesc: (DC untyped surface read, Surface = 253, "
|
||||
" SIMD8, Mask = 0xe) mlen 1 ex_mlen 0 rlen 1 "
|
||||
" { align1 1Q @1 $1 };\n",
|
||||
offset_reg, base_addr, send_suffix, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
case 125: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(8) g127<1>UD %s<1;1,0>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(8) g127<1>UD g127<1;1,0>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send(8) %sUD g127UD nullUD 0x02100500 0x00000000\n"
|
||||
" ugm MsgDesc: ( load, a32, d32, x, L1STATE_L3MOCS dst_len = 1, "
|
||||
" src0_len = 1, flat ) src1_len = 0 base_offset 0 "
|
||||
" { align1 1Q A@1 $1 };\n",
|
||||
offset_reg, base_addr, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
case 200: {
|
||||
ralloc_asprintf_append(src,
|
||||
"mul(8) g127<1>UD %s<1;1,0>UD 0x4UW { align1 @1 1Q };\n"
|
||||
"add(8) g127<1>UD g127<1;1,0>UD 0x%08xUD { align1 @1 1Q };\n"
|
||||
"send(8) %sUD g127UD nullUD 0x02100500 0x00000000\n"
|
||||
" ugm MsgDesc: ( load, a32, d32, x, L1STATE_L3MOCS dst_len = 1, "
|
||||
" src0_len = 1, flat ) src1_len = 0 base_offset 0 "
|
||||
" { align1 1Q A@1 $1 };\n",
|
||||
offset_reg, base_addr, data_reg);
|
||||
executor_macro_syncnop(ec, src, "@syncnop");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("invalid gfx version");
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
find_macro_symbol(char *line)
|
||||
{
|
||||
char *c = line;
|
||||
while (isspace(*c)) c++;
|
||||
return *c == '@' ? c : NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
match_macro_name(const char *name, const char *line)
|
||||
{
|
||||
if (!startswith(name, line))
|
||||
return false;
|
||||
line += strlen(name);
|
||||
return !*line || isspace(*line);
|
||||
}
|
||||
|
||||
const char *
|
||||
executor_apply_macros(executor_context *ec, const char *original_src)
|
||||
{
|
||||
char *scratch = ralloc_strdup(ec->mem_ctx, original_src);
|
||||
|
||||
/* Create a ralloc'ed empty string so can call append to it later. */
|
||||
char *src = ralloc_strdup(ec->mem_ctx, "");
|
||||
|
||||
/* TODO: Create a @send macro for common combinations of MsgDesc. */
|
||||
static const struct {
|
||||
const char *name;
|
||||
void (*func)(executor_context *ec, char **output, char *line);
|
||||
} macros[] = {
|
||||
{ "@eot", executor_macro_eot },
|
||||
{ "@mov", executor_macro_mov },
|
||||
{ "@write", executor_macro_write },
|
||||
{ "@read", executor_macro_read },
|
||||
{ "@id", executor_macro_id },
|
||||
{ "@syncnop", executor_macro_syncnop },
|
||||
};
|
||||
|
||||
char *next = scratch;
|
||||
while (next) {
|
||||
char *line = next;
|
||||
char *end = line;
|
||||
|
||||
while (*end && *end != '\n') end++;
|
||||
next = *end ? end + 1 : NULL;
|
||||
*end = '\0';
|
||||
|
||||
char *macro = find_macro_symbol(line);
|
||||
if (!macro) {
|
||||
ralloc_asprintf_append(&src, "%s\n", line);
|
||||
} else {
|
||||
bool found = false;
|
||||
for (int i = 0; i < ARRAY_SIZE(macros); i++) {
|
||||
if (match_macro_name(macros[i].name, macro)) {
|
||||
macros[i].func(ec, &src, macro);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
failf("unsupported macro line: %s", macro);
|
||||
}
|
||||
}
|
||||
|
||||
return src;
|
||||
}
|
||||
850
src/intel/executor/executor_main.c
Normal file
850
src/intel/executor/executor_main.c
Normal file
|
|
@ -0,0 +1,850 @@
|
|||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <lua.h>
|
||||
#include <lualib.h>
|
||||
#include <lauxlib.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
#include "drm-uapi/xe_drm.h"
|
||||
|
||||
#include "intel/compiler/brw_asm.h"
|
||||
#include "intel/compiler/brw_isa_info.h"
|
||||
#include "intel/common/intel_gem.h"
|
||||
#include "intel/common/xe/intel_engine.h"
|
||||
#include "intel/decoder/intel_decoder.h"
|
||||
#include "intel/dev/intel_debug.h"
|
||||
|
||||
#include "executor.h"
|
||||
|
||||
enum {
|
||||
/* Predictable base addresses here make it easier to spot errors. */
|
||||
EXECUTOR_BO_BATCH_ADDR = 0x10000000,
|
||||
EXECUTOR_BO_EXTRA_ADDR = 0x20000000,
|
||||
EXECUTOR_BO_DATA_ADDR = 0x30000000,
|
||||
|
||||
/* Apply to all BOs. */
|
||||
EXECUTOR_BO_SIZE = 10 * 1024 * 1024,
|
||||
};
|
||||
|
||||
static void
|
||||
print_help()
|
||||
{
|
||||
printf(
|
||||
"Executes shaders written for Intel GPUs\n"
|
||||
"usage: executor FILENAME\n"
|
||||
"\n"
|
||||
"The input is a Lua script that can perform data manipulation\n"
|
||||
"and dispatch execution of compute shaders, written in Xe assembly,\n"
|
||||
"the same format used by the brw_asm assembler or when dumping\n"
|
||||
"shaders in debug mode.\n"
|
||||
"\n"
|
||||
"The goal is to have a tool to experiment directly with certain\n"
|
||||
"assembly instructions and the shared units without having to\n"
|
||||
"instrument the drivers.\n"
|
||||
"\n"
|
||||
"EXECUTION CONTEXT\n"
|
||||
"\n"
|
||||
"By default compute shaders are used with SIMD8 for Gfx9-125 and SIMD16\n"
|
||||
"for Xe2. Only a single thread is dispatched. A data buffer is used to\n"
|
||||
"pipe data into the shader and out of it, it is bound to the graphics\n"
|
||||
"address 0x%08x.\n"
|
||||
"\n"
|
||||
"The Gfx versions have differences in their assembly and shared units, so\n"
|
||||
"other than very simple examples, scripts for this program will be either\n"
|
||||
"specific to a version or provide shader variants for multiple versions.\n"
|
||||
"\n"
|
||||
"ASSEMBLY MACROS\n"
|
||||
"\n"
|
||||
"In addition to regular instructions, the follow macros will generate\n"
|
||||
"assembly code based on the Gfx version being executed. Unlike in regular\n"
|
||||
"instructions, REGs don't use regions and can't be immediates.\n"
|
||||
"\n"
|
||||
"- @eot\n"
|
||||
" Send an EOT message.\n"
|
||||
"\n"
|
||||
"- @mov REG IMM\n"
|
||||
" Like a regular MOV but accepts numbers in both decimal and\n"
|
||||
" floating-point.\n"
|
||||
"\n"
|
||||
"- @id REG\n"
|
||||
" Write a local invocation index into REG.\n"
|
||||
"\n"
|
||||
"- @read DST_REG OFFSET_REG\n"
|
||||
" Read 32-bit values from the memory buffer at OFFSET_REG into DST_REG.\n"
|
||||
"\n"
|
||||
"- @write OFFSET_REG SRC_REG\n"
|
||||
" Write 32-bit values from SRC_REG to the memory buffer at OFFSET_REG.\n"
|
||||
"\n"
|
||||
"- @syncnop\n"
|
||||
" Produce a coarse grained sync.nop (when applicable) to ensure data from\n"
|
||||
" macros above are read/written.\n"
|
||||
"\n"
|
||||
"LUA ENVIRONMENT\n"
|
||||
"\n"
|
||||
"In addition to the regular Lua standard library the following variables and.\n"
|
||||
"functions are available.\n"
|
||||
"\n"
|
||||
"- execute({src=STR, data=ARRAY}) -> ARRAY\n"
|
||||
" Takes a table as argument. The 'src' in the table contains the shader to be\n"
|
||||
" executed. The 'data' argument will be used to fill the data buffer with 32-bit\n"
|
||||
" values. The function returns an ARRAY with the contents of the data buffer\n"
|
||||
" after the shader completes.\n"
|
||||
"\n"
|
||||
"- dump(ARRAY, COUNT)\n"
|
||||
" Pretty print the COUNT first elements of an array of 32-bit values.\n"
|
||||
"\n"
|
||||
"- check_ver(V, ...), check_verx10(V, ...)\n"
|
||||
" Exit if the Gfx version being executed isn't in the arguments list.\n"
|
||||
"\n"
|
||||
"- ver, verx10\n"
|
||||
" Variables containing the Gfx version being executed.\n"
|
||||
"\n"
|
||||
"This program was compiled with %s.\n"
|
||||
"\n"
|
||||
"ENVIRONMENT VARIABLES\n"
|
||||
"\n"
|
||||
"The following INTEL_DEBUG values (comma separated) are used:\n"
|
||||
"\n"
|
||||
" - bat Dumps the batch buffer.\n"
|
||||
" - color Uses colore for the above.\n"
|
||||
" - cs Dumps the assembly after macro processing.\n"
|
||||
"\n"
|
||||
"EXAMPLE\n"
|
||||
"\n"
|
||||
"The following script\n"
|
||||
"\n"
|
||||
" local r = execute {\n"
|
||||
" data={ [42] = 0x100 },\n"
|
||||
" src=[[\n"
|
||||
" @mov g1 42\n"
|
||||
" @read g2 g1\n"
|
||||
"\n"
|
||||
" @id g3\n"
|
||||
"\n"
|
||||
" add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q };\n"
|
||||
"\n"
|
||||
" @write g3 g4\n"
|
||||
" ]]\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" dump(r, 4)\n"
|
||||
"\n"
|
||||
"Will produce the following output\n"
|
||||
"\n"
|
||||
" [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103\n"
|
||||
"\n"
|
||||
"More examples can be found in the examples/ directory in the source code.\n"
|
||||
"\n", EXECUTOR_BO_DATA_ADDR, LUA_RELEASE);
|
||||
}
|
||||
|
||||
static struct {
|
||||
struct intel_device_info devinfo;
|
||||
struct isl_device isl_dev;
|
||||
struct brw_isa_info isa;
|
||||
int fd;
|
||||
} E;
|
||||
|
||||
#define genX_call(func, ...) \
|
||||
switch (E.devinfo.verx10) { \
|
||||
case 90: gfx9_ ##func(__VA_ARGS__); break; \
|
||||
case 110: gfx11_ ##func(__VA_ARGS__); break; \
|
||||
case 120: gfx12_ ##func(__VA_ARGS__); break; \
|
||||
case 125: gfx125_##func(__VA_ARGS__); break; \
|
||||
case 200: gfx20_ ##func(__VA_ARGS__); break; \
|
||||
default: unreachable("Unsupported hardware generation"); \
|
||||
}
|
||||
|
||||
static void
|
||||
executor_create_bo(executor_context *ec, executor_bo *bo, uint64_t addr, uint32_t size_in_bytes)
|
||||
{
|
||||
if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
|
||||
struct drm_i915_gem_create gem_create = {
|
||||
.size = size_in_bytes,
|
||||
};
|
||||
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
|
||||
if (err)
|
||||
failf("i915_gem_create");
|
||||
|
||||
struct drm_i915_gem_mmap_offset mm = {
|
||||
.handle = gem_create.handle,
|
||||
.flags = ec->devinfo->has_local_mem ? I915_MMAP_OFFSET_FIXED
|
||||
: I915_MMAP_OFFSET_WC,
|
||||
};
|
||||
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mm);
|
||||
if (err)
|
||||
failf("i915_gem_mmap_offset");
|
||||
|
||||
bo->handle = gem_create.handle;
|
||||
bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, ec->fd, mm.offset);
|
||||
if (!bo->map)
|
||||
failf("mmap");
|
||||
} else {
|
||||
assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
|
||||
|
||||
struct drm_xe_gem_create gem_create = {
|
||||
.size = size_in_bytes,
|
||||
.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
|
||||
.placement = 1u << ec->devinfo->mem.sram.mem.instance,
|
||||
};
|
||||
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
|
||||
if (err)
|
||||
failf("xe_gem_create");
|
||||
|
||||
struct drm_xe_gem_mmap_offset mm = {
|
||||
.handle = gem_create.handle,
|
||||
};
|
||||
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
|
||||
if (err)
|
||||
failf("xe_gem_mmap_offset");
|
||||
|
||||
bo->handle = gem_create.handle;
|
||||
bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, ec->fd, mm.offset);
|
||||
if (!bo->map)
|
||||
failf("mmap");
|
||||
}
|
||||
|
||||
bo->size = size_in_bytes;
|
||||
bo->addr = addr;
|
||||
bo->cursor = bo->map;
|
||||
}
|
||||
|
||||
static void
|
||||
executor_destroy_bo(executor_context *ec, executor_bo *bo)
|
||||
{
|
||||
struct drm_gem_close gem_close = {
|
||||
.handle = bo->handle,
|
||||
};
|
||||
|
||||
int err = munmap(bo->map, bo->size);
|
||||
if (err)
|
||||
failf("munmap");
|
||||
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
|
||||
if (err)
|
||||
failf("gem_close");
|
||||
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
}
|
||||
|
||||
static void
|
||||
executor_print_bo(executor_bo *bo, const char *name)
|
||||
{
|
||||
assert((bo->cursor - bo->map) % 4 == 0);
|
||||
uint32_t *dw = bo->map;
|
||||
uint32_t len = (uint32_t *)bo->cursor - dw;
|
||||
|
||||
printf("=== %s (0x%08lx, %lu bytes) ===\n", name, bo->addr, bo->cursor - bo->map);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ((i % 8) == 0) printf("[0x%08x] ", (i*4) + (uint32_t)bo->addr);
|
||||
printf("0x%08x ", dw[i]);
|
||||
if ((i % 8) == 7) printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void *
|
||||
executor_alloc_bytes(executor_bo *bo, uint32_t size)
|
||||
{
|
||||
return executor_alloc_bytes_aligned(bo, size, 0);
|
||||
}
|
||||
|
||||
void *
|
||||
executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment)
|
||||
{
|
||||
void *r = bo->cursor;
|
||||
if (alignment) {
|
||||
r = (void *)(((uintptr_t)r + alignment-1) & ~((uintptr_t)alignment-1));
|
||||
}
|
||||
bo->cursor = r + size;
|
||||
return r;
|
||||
}
|
||||
|
||||
executor_address
|
||||
executor_address_of_ptr(executor_bo *bo, void *ptr)
|
||||
{
|
||||
return (executor_address){ptr - bo->map + bo->addr};
|
||||
}
|
||||
|
||||
static int
|
||||
get_drm_device(struct intel_device_info *devinfo)
|
||||
{
|
||||
drmDevicePtr devices[8];
|
||||
int max_devices = drmGetDevices2(0, devices, 8);
|
||||
|
||||
int i, fd = -1;
|
||||
for (i = 0; i < max_devices; i++) {
|
||||
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
|
||||
devices[i]->bustype == DRM_BUS_PCI &&
|
||||
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
|
||||
fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
continue;
|
||||
|
||||
if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1) ||
|
||||
devinfo->ver < 8) {
|
||||
close(fd);
|
||||
fd = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Found a device! */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static struct intel_batch_decode_bo
|
||||
decode_get_bo(void *_ec, bool ppgtt, uint64_t address)
|
||||
{
|
||||
executor_context *ec = _ec;
|
||||
struct intel_batch_decode_bo bo = {0};
|
||||
|
||||
if (address >= ec->bo.batch.addr && address < ec->bo.batch.addr + ec->bo.batch.size) {
|
||||
bo.addr = ec->bo.batch.addr;
|
||||
bo.size = ec->bo.batch.size;
|
||||
bo.map = ec->bo.batch.map;
|
||||
} else if (address >= ec->bo.extra.addr && address < ec->bo.extra.addr + ec->bo.extra.size) {
|
||||
bo.addr = ec->bo.extra.addr;
|
||||
bo.size = ec->bo.extra.size;
|
||||
bo.map = ec->bo.extra.map;
|
||||
} else if (address >= ec->bo.data.addr && address < ec->bo.data.addr + ec->bo.data.size) {
|
||||
bo.addr = ec->bo.data.addr;
|
||||
bo.size = ec->bo.data.size;
|
||||
bo.map = ec->bo.data.map;
|
||||
}
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
decode_get_state_size(void *_ec, uint64_t address, uint64_t base_address)
|
||||
{
|
||||
return EXECUTOR_BO_SIZE;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_execute_data(executor_context *ec, lua_State *L, int table_idx)
|
||||
{
|
||||
uint32_t *data = ec->bo.data.map;
|
||||
|
||||
lua_pushvalue(L, table_idx);
|
||||
|
||||
lua_pushnil(L);
|
||||
while (lua_next(L, -2) != 0) {
|
||||
int val_idx = lua_gettop(L);
|
||||
int key_idx = val_idx - 1;
|
||||
|
||||
if (lua_type(L, key_idx) != LUA_TNUMBER || !lua_isinteger(L, key_idx))
|
||||
failf("invalid key for data in execute call");
|
||||
|
||||
lua_Integer key = lua_tointeger(L, key_idx);
|
||||
assert(key <= 10 * 1024 * 1024 / 4);
|
||||
lua_Integer val = lua_tointeger(L, val_idx);
|
||||
data[key] = val;
|
||||
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
parse_execute_args(executor_context *ec, lua_State *L, executor_params *params)
|
||||
{
|
||||
int opts = lua_gettop(L);
|
||||
|
||||
lua_pushnil(L);
|
||||
|
||||
while (lua_next(L, opts) != 0) {
|
||||
int val_idx = lua_gettop(L);
|
||||
int key_idx = val_idx - 1;
|
||||
|
||||
if (lua_type(L, key_idx) != LUA_TSTRING) {
|
||||
lua_pop(L, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
const char *key = lua_tostring(L, key_idx);
|
||||
|
||||
if (!strcmp(key, "src")) {
|
||||
params->original_src = ralloc_strdup(ec->mem_ctx, luaL_checkstring(L, val_idx));
|
||||
} else if (!strcmp(key, "data")) {
|
||||
parse_execute_data(ec, L, val_idx);
|
||||
} else {
|
||||
failf("unknown parameter '%s' for execute()", key);
|
||||
}
|
||||
|
||||
lua_pop(L, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_context_setup(executor_context *ec)
|
||||
{
|
||||
if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
|
||||
struct drm_i915_gem_context_create create = {0};
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
|
||||
if (err)
|
||||
failf("i915_gem_context_create");
|
||||
ec->i915.ctx_id = create.ctx_id;
|
||||
} else {
|
||||
assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
|
||||
|
||||
struct drm_xe_vm_create create = {
|
||||
.flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
|
||||
};
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_CREATE, &create);
|
||||
if (err)
|
||||
failf("xe_vm_create");
|
||||
ec->xe.vm_id = create.vm_id;
|
||||
|
||||
struct drm_xe_engine_class_instance instance = {0};
|
||||
|
||||
struct intel_query_engine_info *engines_info = xe_engine_get_info(ec->fd);
|
||||
assert(engines_info);
|
||||
|
||||
bool found_engine = false;
|
||||
for (int i = 0; i < engines_info->num_engines; i++) {
|
||||
struct intel_engine_class_instance *e = &engines_info->engines[i];
|
||||
if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
|
||||
instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
|
||||
instance.engine_instance = e->engine_instance;
|
||||
instance.gt_id = e->gt_id;
|
||||
found_engine = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(found_engine);
|
||||
|
||||
struct drm_xe_exec_queue_create queue_create = {
|
||||
.vm_id = ec->xe.vm_id,
|
||||
.width = 1,
|
||||
.num_placements = 1,
|
||||
.instances = (uintptr_t)&instance,
|
||||
};
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
|
||||
if (err)
|
||||
failf("xe_exec_queue_create");
|
||||
ec->xe.queue_id = queue_create.exec_queue_id;
|
||||
}
|
||||
|
||||
executor_create_bo(ec, &ec->bo.batch, EXECUTOR_BO_BATCH_ADDR, EXECUTOR_BO_SIZE);
|
||||
executor_create_bo(ec, &ec->bo.extra, EXECUTOR_BO_EXTRA_ADDR, EXECUTOR_BO_SIZE);
|
||||
executor_create_bo(ec, &ec->bo.data, EXECUTOR_BO_DATA_ADDR, EXECUTOR_BO_SIZE);
|
||||
|
||||
uint32_t *data = ec->bo.data.map;
|
||||
for (int i = 0; i < EXECUTOR_BO_SIZE / 4; i++)
|
||||
data[i] = 0xABABABAB;
|
||||
}
|
||||
|
||||
static void
|
||||
executor_context_dispatch(executor_context *ec)
|
||||
{
|
||||
if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
|
||||
struct drm_i915_gem_exec_object2 objs[] = {
|
||||
{
|
||||
.handle = ec->bo.batch.handle,
|
||||
.offset = ec->bo.batch.addr,
|
||||
.flags = EXEC_OBJECT_PINNED,
|
||||
},
|
||||
{
|
||||
.handle = ec->bo.extra.handle,
|
||||
.offset = ec->bo.extra.addr,
|
||||
.flags = EXEC_OBJECT_PINNED,
|
||||
},
|
||||
{
|
||||
.handle = ec->bo.data.handle,
|
||||
.offset = ec->bo.data.addr,
|
||||
.flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE,
|
||||
},
|
||||
};
|
||||
|
||||
struct drm_i915_gem_execbuffer2 exec = {0};
|
||||
exec.buffers_ptr = (uintptr_t)objs;
|
||||
exec.buffer_count = ARRAY_SIZE(objs);
|
||||
exec.batch_start_offset = ec->batch_start - ec->bo.batch.addr;
|
||||
exec.flags = I915_EXEC_BATCH_FIRST;
|
||||
exec.rsvd1 = ec->i915.ctx_id;
|
||||
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
|
||||
if (err)
|
||||
failf("i915_gem_execbuffer2");
|
||||
|
||||
struct drm_i915_gem_wait wait = {0};
|
||||
wait.bo_handle = ec->bo.batch.handle;
|
||||
wait.timeout_ns = INT64_MAX;
|
||||
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
|
||||
if (err)
|
||||
failf("i915_gem_wait");
|
||||
} else {
|
||||
assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
|
||||
|
||||
/* First syncobj is signalled by the binding operation and waited by the
|
||||
* execution of the batch buffer.
|
||||
*
|
||||
* Second syncobj is singalled by the execution of batch buffer and
|
||||
* waited at the end.
|
||||
*/
|
||||
uint32_t sync_handles[2] = {0};
|
||||
for (int i = 0; i < 2; i++) {
|
||||
struct drm_syncobj_create sync_create = {0};
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
|
||||
if (err)
|
||||
failf("syncobj_create");
|
||||
sync_handles[i] = sync_create.handle;
|
||||
}
|
||||
|
||||
struct drm_xe_vm_bind_op bind_ops[] = {
|
||||
{
|
||||
.op = DRM_XE_VM_BIND_OP_MAP,
|
||||
.obj = ec->bo.batch.handle,
|
||||
.addr = ec->bo.batch.addr,
|
||||
.range = EXECUTOR_BO_SIZE,
|
||||
.pat_index = ec->devinfo->pat.cached_coherent.index,
|
||||
},
|
||||
{
|
||||
.op = DRM_XE_VM_BIND_OP_MAP,
|
||||
.obj = ec->bo.extra.handle,
|
||||
.addr = ec->bo.extra.addr,
|
||||
.range = EXECUTOR_BO_SIZE,
|
||||
.pat_index = ec->devinfo->pat.cached_coherent.index,
|
||||
},
|
||||
{
|
||||
.op = DRM_XE_VM_BIND_OP_MAP,
|
||||
.obj = ec->bo.data.handle,
|
||||
.addr = ec->bo.data.addr,
|
||||
.range = EXECUTOR_BO_SIZE,
|
||||
.pat_index = ec->devinfo->pat.cached_coherent.index,
|
||||
},
|
||||
};
|
||||
|
||||
struct drm_xe_sync bind_syncs[] = {
|
||||
{
|
||||
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
|
||||
.handle = sync_handles[0],
|
||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
||||
},
|
||||
};
|
||||
|
||||
struct drm_xe_vm_bind bind = {
|
||||
.vm_id = ec->xe.vm_id,
|
||||
.num_binds = ARRAY_SIZE(bind_ops),
|
||||
.vector_of_binds = (uintptr_t)bind_ops,
|
||||
.num_syncs = 1,
|
||||
.syncs = (uintptr_t)bind_syncs,
|
||||
};
|
||||
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_BIND, &bind);
|
||||
if (err)
|
||||
failf("xe_vm_bind");
|
||||
|
||||
struct drm_xe_sync exec_syncs[] = {
|
||||
{
|
||||
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
|
||||
.handle = sync_handles[0],
|
||||
},
|
||||
{
|
||||
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
|
||||
.handle = sync_handles[1],
|
||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
||||
}
|
||||
};
|
||||
|
||||
struct drm_xe_exec exec = {
|
||||
.exec_queue_id = ec->xe.queue_id,
|
||||
.num_batch_buffer = 1,
|
||||
.address = ec->batch_start,
|
||||
.num_syncs = 2,
|
||||
.syncs = (uintptr_t)exec_syncs,
|
||||
};
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC, &exec);
|
||||
if (err)
|
||||
failf("xe_exec");
|
||||
|
||||
struct drm_syncobj_wait wait = {
|
||||
.count_handles = 1,
|
||||
.handles = (uintptr_t)&sync_handles[1],
|
||||
.timeout_nsec = INT64_MAX,
|
||||
};
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
|
||||
if (err)
|
||||
failf("syncobj_wait");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
executor_context_teardown(executor_context *ec)
|
||||
{
|
||||
executor_destroy_bo(ec, &ec->bo.batch);
|
||||
executor_destroy_bo(ec, &ec->bo.extra);
|
||||
executor_destroy_bo(ec, &ec->bo.data);
|
||||
|
||||
if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
|
||||
struct drm_i915_gem_context_destroy destroy = {
|
||||
.ctx_id = ec->i915.ctx_id,
|
||||
};
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
|
||||
if (err)
|
||||
failf("i915_gem_context_destroy");
|
||||
} else {
|
||||
assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
|
||||
|
||||
struct drm_xe_exec_queue_destroy queue_destroy = {
|
||||
.exec_queue_id = ec->xe.queue_id,
|
||||
};
|
||||
int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
|
||||
if (err)
|
||||
failf("xe_exec_queue_destroy");
|
||||
|
||||
struct drm_xe_vm_destroy destroy = {
|
||||
.vm_id = ec->xe.vm_id,
|
||||
};
|
||||
err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
|
||||
if (err)
|
||||
failf("xe_vm_destroy");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
l_execute(lua_State *L)
|
||||
{
|
||||
executor_context ec = {
|
||||
.mem_ctx = ralloc_context(NULL),
|
||||
.devinfo = &E.devinfo,
|
||||
.isl_dev = &E.isl_dev,
|
||||
.fd = E.fd,
|
||||
};
|
||||
|
||||
executor_context_setup(&ec);
|
||||
|
||||
executor_params params = {0};
|
||||
|
||||
{
|
||||
if (lua_gettop(L) != 1)
|
||||
failf("execute() must have a single table argument");
|
||||
|
||||
parse_execute_args(&ec, L, ¶ms);
|
||||
|
||||
const char *src = executor_apply_macros(&ec, params.original_src);
|
||||
|
||||
FILE *f = fmemopen((void *)src, strlen(src), "r");
|
||||
brw_assemble_result asm = brw_assemble(ec.mem_ctx, ec.devinfo, f, "", 0);
|
||||
fclose(f);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_CS) || !asm.bin) {
|
||||
printf("=== Processed assembly source ===\n"
|
||||
"%s"
|
||||
"=================================\n\n", src);
|
||||
}
|
||||
|
||||
if (!asm.bin)
|
||||
failf("assembler failure");
|
||||
|
||||
params.kernel_bin = asm.bin;
|
||||
params.kernel_size = asm.bin_size;
|
||||
}
|
||||
|
||||
genX_call(emit_execute, &ec, ¶ms);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BATCH)) {
|
||||
struct intel_batch_decode_ctx decoder;
|
||||
enum intel_batch_decode_flags flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
|
||||
if (INTEL_DEBUG(DEBUG_COLOR))
|
||||
flags |= INTEL_BATCH_DECODE_IN_COLOR;
|
||||
|
||||
intel_batch_decode_ctx_init_brw(&decoder, &E.isa, &E.devinfo, stdout,
|
||||
flags, NULL, decode_get_bo, decode_get_state_size, &ec);
|
||||
|
||||
assert(ec.bo.batch.cursor > ec.bo.batch.map);
|
||||
const int batch_offset = ec.batch_start - ec.bo.batch.addr;
|
||||
const int batch_size = (ec.bo.batch.cursor - ec.bo.batch.map) - batch_offset;
|
||||
assert(batch_offset < batch_size);
|
||||
|
||||
intel_print_batch(&decoder, ec.bo.batch.map, batch_size, ec.batch_start, false);
|
||||
|
||||
intel_batch_decode_ctx_finish(&decoder);
|
||||
}
|
||||
|
||||
executor_context_dispatch(&ec);
|
||||
|
||||
{
|
||||
/* TODO: Use userdata to return a wrapped C array instead of building
|
||||
* values. Could make integration with array operations better.
|
||||
*/
|
||||
uint32_t *data = ec.bo.data.map;
|
||||
const int n = ec.bo.data.size / 4;
|
||||
lua_createtable(L, n, 0);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
lua_pushinteger(L, data[i]);
|
||||
lua_seti(L, -2, i);
|
||||
}
|
||||
}
|
||||
|
||||
executor_context_teardown(&ec);
|
||||
ralloc_free(ec.mem_ctx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
l_dump(lua_State *L)
|
||||
{
|
||||
/* TODO: Use a table to add options for the dump, e.g.
|
||||
* starting offset, format, etc.
|
||||
*/
|
||||
|
||||
assert(lua_type(L, 1) == LUA_TTABLE);
|
||||
assert(lua_type(L, 2) == LUA_TNUMBER);
|
||||
assert(lua_isinteger(L, 2));
|
||||
|
||||
lua_Integer len_ = lua_tointeger(L, 2);
|
||||
assert(len_ >= 0 && len_ <= INT_MAX);
|
||||
int len = len_;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (i%8 == 0) printf("[0x%08x]", i * 4);
|
||||
lua_rawgeti(L, 1, i);
|
||||
lua_Integer val = lua_tointeger(L, -1);
|
||||
printf(" 0x%08x", (uint32_t)val);
|
||||
lua_pop(L, 1);
|
||||
if (i%8 == 7) printf("\n");
|
||||
}
|
||||
if (i%8 != 0) printf("\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
l_check_ver(lua_State *L)
|
||||
{
|
||||
int top = lua_gettop(L);
|
||||
for (int i = 1; i <= top; i++) {
|
||||
lua_Integer v = luaL_checknumber(L, i);
|
||||
if (E.devinfo.ver == v) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
failf("script doesn't support version=%d verx10=%d\n",
|
||||
E.devinfo.ver, E.devinfo.verx10);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
l_check_verx10(lua_State *L)
|
||||
{
|
||||
int top = lua_gettop(L);
|
||||
for (int i = 1; i <= top; i++) {
|
||||
lua_Integer v = luaL_checknumber(L, i);
|
||||
if (E.devinfo.verx10 == v) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
failf("script doesn't support version=%d verx10=%d\n",
|
||||
E.devinfo.ver, E.devinfo.verx10);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO: Review numeric limits in the code, specially around Lua integer
|
||||
* conversion.
|
||||
*/
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 2 ||
|
||||
!strcmp(argv[1], "--help") ||
|
||||
!strcmp(argv[1], "-help") ||
|
||||
!strcmp(argv[1], "-h") ||
|
||||
!strcmp(argv[1], "help")) {
|
||||
print_help();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (argc > 2) {
|
||||
/* TODO: Expose extra arguments to the script as a variable. */
|
||||
failf("invalid extra arguments\nusage: executor FILENAME");
|
||||
return 1;
|
||||
}
|
||||
|
||||
process_intel_debug_variable();
|
||||
|
||||
E.fd = get_drm_device(&E.devinfo);
|
||||
isl_device_init(&E.isl_dev, &E.devinfo);
|
||||
brw_init_isa_info(&E.isa, &E.devinfo);
|
||||
assert(E.devinfo.kmd_type == INTEL_KMD_TYPE_I915 ||
|
||||
E.devinfo.kmd_type == INTEL_KMD_TYPE_XE);
|
||||
|
||||
lua_State *L = luaL_newstate();
|
||||
|
||||
/* TODO: Could be nice to export some kind of builder interface,
|
||||
* maybe even let the script construct a shader at the BRW IR
|
||||
* level and let the later passes kick in.
|
||||
*/
|
||||
|
||||
luaL_openlibs(L);
|
||||
|
||||
lua_pushinteger(L, E.devinfo.ver);
|
||||
lua_setglobal(L, "ver");
|
||||
|
||||
lua_pushinteger(L, E.devinfo.verx10);
|
||||
lua_setglobal(L, "verx10");
|
||||
|
||||
lua_pushcfunction(L, l_execute);
|
||||
lua_setglobal(L, "execute");
|
||||
|
||||
lua_pushcfunction(L, l_dump);
|
||||
lua_setglobal(L, "dump");
|
||||
|
||||
lua_pushcfunction(L, l_check_ver);
|
||||
lua_setglobal(L, "check_ver");
|
||||
|
||||
lua_pushcfunction(L, l_check_verx10);
|
||||
lua_setglobal(L, "check_verx10");
|
||||
|
||||
const char *filename = argv[1];
|
||||
int err = luaL_loadfile(L, filename);
|
||||
if (err)
|
||||
failf("failed to load script: %s", lua_tostring(L, -1));
|
||||
|
||||
err = lua_pcall(L, 0, 0, 0);
|
||||
if (err)
|
||||
failf("failed to run script: %s", lua_tostring(L, -1));
|
||||
|
||||
lua_close(L);
|
||||
close(E.fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
failf(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
fprintf(stderr, "ERROR: ");
|
||||
vfprintf(stderr, fmt, args);
|
||||
fprintf(stderr, "\n");
|
||||
va_end(args);
|
||||
exit(1);
|
||||
}
|
||||
58
src/intel/executor/meson.build
Normal file
58
src/intel/executor/meson.build
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright © 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
if not dep_lua.found()
|
||||
subdir_done()
|
||||
endif
|
||||
|
||||
executor_flags = [
|
||||
no_override_init_args,
|
||||
sse2_args,
|
||||
]
|
||||
|
||||
executor_includes = [
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_intel,
|
||||
]
|
||||
|
||||
executor_hw_libs = []
|
||||
foreach v: ['90', '110', '120', '125', '200']
|
||||
executor_hw_libs += static_library(
|
||||
'executor_hw_ver@0@'.format(v),
|
||||
['executor_genx.c', gen_xml_pack],
|
||||
include_directories: [executor_includes],
|
||||
c_args: [
|
||||
executor_flags,
|
||||
'-DGFX_VERx10=@0@'.format(v),
|
||||
],
|
||||
gnu_symbol_visibility: 'hidden',
|
||||
dependencies: [
|
||||
dep_valgrind,
|
||||
idep_genxml,
|
||||
],
|
||||
)
|
||||
endforeach
|
||||
|
||||
executor = executable(
|
||||
'executor',
|
||||
[
|
||||
'executor_main.c',
|
||||
'executor_macros.c',
|
||||
],
|
||||
dependencies: [
|
||||
dep_libdrm,
|
||||
dep_lua,
|
||||
dep_valgrind,
|
||||
idep_brw_asm,
|
||||
idep_genxml,
|
||||
idep_intel_decoder_brw,
|
||||
idep_intel_dev,
|
||||
idep_libintel_common,
|
||||
],
|
||||
include_directories: [executor_includes],
|
||||
link_with: [executor_hw_libs],
|
||||
c_args: [executor_flags],
|
||||
gnu_symbol_visibility: 'hidden',
|
||||
install: true
|
||||
)
|
||||
|
|
@ -23,6 +23,7 @@ if with_intel_hasvk or with_intel_vk or with_gallium_iris
|
|||
endif
|
||||
if with_intel_tools
|
||||
subdir('tools')
|
||||
subdir('executor')
|
||||
endif
|
||||
if get_option('vulkan-layers').contains('intel-nullhw')
|
||||
subdir('nullhw-layer')
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue