mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
radeonsi: Add compute support v3
v2: - Only dump shaders when env variable is set. v3: - Don't emit VGT registers Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com
This commit is contained in:
parent
4f7fe2cf2c
commit
302f53dc20
11 changed files with 378 additions and 49 deletions
|
|
@ -30,7 +30,7 @@
|
|||
#include <llvm-c/BitReader.h>
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
static LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
|
||||
LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
|
||||
unsigned bitcode_len)
|
||||
{
|
||||
LLVMMemoryBufferRef buf;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@
|
|||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
|
||||
unsigned bitcode_len);
|
||||
unsigned radeon_llvm_get_num_kernels(const unsigned char *bitcode, unsigned bitcode_len);
|
||||
LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
|
||||
const unsigned char *bitcode, unsigned bitcode_len);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ C_SOURCES := \
|
|||
r600_texture.c \
|
||||
r600_translate.c \
|
||||
radeonsi_pm4.c \
|
||||
radeonsi_compute.c \
|
||||
si_state.c \
|
||||
si_state_streamout.c \
|
||||
si_state_draw.c \
|
||||
|
|
|
|||
234
src/gallium/drivers/radeonsi/radeonsi_compute.c
Normal file
234
src/gallium/drivers/radeonsi/radeonsi_compute.c
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
#include "util/u_memory.h"
|
||||
|
||||
#include "radeonsi_pipe.h"
|
||||
#include "radeonsi_shader.h"
|
||||
|
||||
#include "radeon_llvm_util.h"
|
||||
|
||||
struct si_pipe_compute {
|
||||
struct r600_context *ctx;
|
||||
|
||||
unsigned local_size;
|
||||
unsigned private_size;
|
||||
unsigned input_size;
|
||||
struct si_pipe_shader shader;
|
||||
unsigned num_user_sgprs;
|
||||
|
||||
struct si_pm4_state *pm4_buffers;
|
||||
|
||||
};
|
||||
|
||||
static void *radeonsi_create_compute_state(
|
||||
struct pipe_context *ctx,
|
||||
const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct si_pipe_compute *program =
|
||||
CALLOC_STRUCT(si_pipe_compute);
|
||||
const struct pipe_llvm_program_header *header;
|
||||
const unsigned char *code;
|
||||
LLVMModuleRef mod;
|
||||
|
||||
header = cso->prog;
|
||||
code = cso->prog + sizeof(struct pipe_llvm_program_header);
|
||||
|
||||
program->ctx = rctx;
|
||||
program->local_size = cso->req_local_mem;
|
||||
program->private_size = cso->req_private_mem;
|
||||
program->input_size = cso->req_input_mem;
|
||||
|
||||
mod = radeon_llvm_parse_bitcode(code, header->num_bytes);
|
||||
si_compile_llvm(rctx, &program->shader, mod);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
rctx->cs_shader_state.program = (struct si_pipe_compute*)state;
|
||||
}
|
||||
|
||||
static void radeonsi_set_global_binding(
|
||||
struct pipe_context *ctx, unsigned first, unsigned n,
|
||||
struct pipe_resource **resources,
|
||||
uint32_t **handles)
|
||||
{
|
||||
unsigned i;
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct si_pipe_compute *program = rctx->cs_shader_state.program;
|
||||
struct si_pm4_state *pm4;
|
||||
|
||||
if (!program->pm4_buffers) {
|
||||
program->pm4_buffers = CALLOC_STRUCT(si_pm4_state);
|
||||
}
|
||||
pm4 = program->pm4_buffers;
|
||||
pm4->compute_pkt = true;
|
||||
|
||||
if (!resources) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = first; i < first + n; i++) {
|
||||
uint64_t va = r600_resource_va(ctx->screen, resources[i]);
|
||||
si_pm4_add_bo(pm4, (struct si_resource*)resources[i],
|
||||
RADEON_USAGE_READWRITE);
|
||||
memcpy(handles[i], &va, sizeof(va));
|
||||
}
|
||||
}
|
||||
|
||||
static void radeonsi_launch_grid(
|
||||
struct pipe_context *ctx,
|
||||
const uint *block_layout, const uint *grid_layout,
|
||||
uint32_t pc, const void *input)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct si_pipe_compute *program = rctx->cs_shader_state.program;
|
||||
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
uint64_t shader_va;
|
||||
unsigned arg_user_sgpr_count;
|
||||
unsigned i;
|
||||
|
||||
pm4->compute_pkt = true;
|
||||
si_cmd_context_control(pm4);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
|
||||
si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) |
|
||||
EVENT_INDEX(0x7) |
|
||||
EVENT_WRITE_INV_L2);
|
||||
si_pm4_cmd_end(pm4, false);
|
||||
|
||||
si_pm4_inval_texture_cache(pm4);
|
||||
si_pm4_inval_shader_cache(pm4);
|
||||
si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
|
||||
|
||||
arg_user_sgpr_count = program->input_size / 4;
|
||||
if (program->input_size % 4 != 0) {
|
||||
arg_user_sgpr_count++;
|
||||
}
|
||||
|
||||
/* XXX: We should store arguments in memory if we run out of user sgprs.
|
||||
*/
|
||||
assert(arg_user_sgpr_count < 16);
|
||||
|
||||
for (i = 0; i < arg_user_sgpr_count; i++) {
|
||||
uint32_t *args = (uint32_t*)input;
|
||||
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 +
|
||||
(i * 4),
|
||||
args[i]);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
|
||||
si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
|
||||
si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
|
||||
S_00B81C_NUM_THREAD_FULL(block_layout[0]));
|
||||
si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
|
||||
S_00B820_NUM_THREAD_FULL(block_layout[1]));
|
||||
si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
|
||||
S_00B824_NUM_THREAD_FULL(block_layout[2]));
|
||||
|
||||
/* XXX: This should be:
|
||||
* (number of compute units) * 4 * (waves per simd) - 1 */
|
||||
si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID, 0x190 /* Default value */);
|
||||
|
||||
shader_va = r600_resource_va(ctx->screen, (void *)program->shader.bo);
|
||||
si_pm4_add_bo(pm4, program->shader.bo, RADEON_USAGE_READ);
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
|
||||
/* We always use at least 3 VGPRS, these come from
|
||||
* TIDIG_COMP_CNT.
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
S_00B848_VGPRS((MAX2(3, program->shader.num_vgprs) - 1) / 4)
|
||||
/* We always use at least 4 + arg_user_sgpr_count. The 4 extra
|
||||
* sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
| S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
|
||||
program->shader.num_sgprs)) - 1) / 8))
|
||||
;
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
|
||||
S_00B84C_SCRATCH_EN(0)
|
||||
| S_00B84C_USER_SGPR(arg_user_sgpr_count)
|
||||
| S_00B84C_TGID_X_EN(1)
|
||||
| S_00B84C_TGID_Y_EN(1)
|
||||
| S_00B84C_TGID_Z_EN(1)
|
||||
| S_00B84C_TG_SIZE_EN(1)
|
||||
| S_00B84C_TIDIG_COMP_CNT(2)
|
||||
| S_00B84C_LDS_SIZE(0)
|
||||
| S_00B84C_EXCP_EN(0))
|
||||
;
|
||||
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
|
||||
S_00B858_SH0_CU_EN(0xffff /* Default value */)
|
||||
| S_00B858_SH1_CU_EN(0xffff /* Default value */))
|
||||
;
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1,
|
||||
S_00B85C_SH0_CU_EN(0xffff /* Default value */)
|
||||
| S_00B85C_SH1_CU_EN(0xffff /* Default value */))
|
||||
;
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
|
||||
si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
|
||||
si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
|
||||
si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
|
||||
si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
|
||||
si_pm4_cmd_end(pm4, false);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
|
||||
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4)));
|
||||
si_pm4_cmd_end(pm4, false);
|
||||
|
||||
si_pm4_inval_texture_cache(pm4);
|
||||
si_pm4_inval_shader_cache(pm4);
|
||||
si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
|
||||
|
||||
si_pm4_emit(rctx, program->pm4_buffers);
|
||||
si_pm4_emit(rctx, pm4);
|
||||
|
||||
#if 0
|
||||
fprintf(stderr, "cdw: %i\n", rctx->cs->cdw);
|
||||
for (i = 0; i < rctx->cs->cdw; i++) {
|
||||
fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
rctx->ws->cs_flush(rctx->cs, RADEON_FLUSH_COMPUTE);
|
||||
rctx->ws->buffer_wait(program->shader.bo->buf, 0);
|
||||
|
||||
FREE(pm4);
|
||||
}
|
||||
|
||||
|
||||
static void si_delete_compute_state(struct pipe_context *ctx, void* state){}
|
||||
static void si_set_compute_resources(struct pipe_context * ctx_,
|
||||
unsigned start, unsigned count,
|
||||
struct pipe_surface ** surfaces) { }
|
||||
static void si_set_cs_sampler_view(struct pipe_context *ctx_,
|
||||
unsigned start_slot, unsigned count,
|
||||
struct pipe_sampler_view **views) { }
|
||||
|
||||
static void si_bind_compute_sampler_states(
|
||||
struct pipe_context *ctx_,
|
||||
unsigned start_slot,
|
||||
unsigned num_samplers,
|
||||
void **samplers_) { }
|
||||
void si_init_compute_functions(struct r600_context *rctx)
|
||||
{
|
||||
rctx->context.create_compute_state = radeonsi_create_compute_state;
|
||||
rctx->context.delete_compute_state = si_delete_compute_state;
|
||||
rctx->context.bind_compute_state = radeonsi_bind_compute_state;
|
||||
/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
|
||||
rctx->context.set_compute_resources = si_set_compute_resources;
|
||||
rctx->context.set_compute_sampler_views = si_set_cs_sampler_view;
|
||||
rctx->context.bind_compute_sampler_states = si_bind_compute_sampler_states;
|
||||
rctx->context.set_global_binding = radeonsi_set_global_binding;
|
||||
rctx->context.launch_grid = radeonsi_launch_grid;
|
||||
}
|
||||
|
|
@ -217,6 +217,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
|
|||
r600_init_query_functions(rctx);
|
||||
r600_init_context_resource_functions(rctx);
|
||||
si_init_surface_functions(rctx);
|
||||
si_init_compute_functions(rctx);
|
||||
|
||||
rctx->context.create_video_decoder = vl_create_decoder;
|
||||
rctx->context.create_video_buffer = vl_video_buffer_create;
|
||||
|
|
@ -342,6 +343,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_NPOT_TEXTURES:
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
case PIPE_CAP_TGSI_INSTANCEID:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return 1;
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
return 0;
|
||||
|
|
@ -366,7 +368,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
|
||||
case PIPE_CAP_USER_VERTEX_BUFFERS:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
|
||||
case PIPE_CAP_CUBE_MAP_ARRAY:
|
||||
|
|
@ -450,6 +451,13 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||
case PIPE_SHADER_GEOMETRY:
|
||||
/* TODO: support and enable geometry programs */
|
||||
return 0;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_LLVM;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
/* TODO: support tessellation */
|
||||
return 0;
|
||||
|
|
@ -516,6 +524,56 @@ static int r600_get_video_param(struct pipe_screen *screen,
|
|||
}
|
||||
}
|
||||
|
||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
struct r600_screen *rscreen = (struct r600_screen *)screen;
|
||||
//TODO: select these params by asic
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_IR_TARGET: {
|
||||
const char *gpu = r600_get_llvm_processor_name(rscreen->family);
|
||||
if (ret) {
|
||||
sprintf(ret, "%s-r600--", gpu);
|
||||
}
|
||||
return (8 + strlen(gpu)) * sizeof(char);
|
||||
}
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
if (ret) {
|
||||
uint64_t * grid_dimension = ret;
|
||||
grid_dimension[0] = 3;
|
||||
}
|
||||
return 1 * sizeof(uint64_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * grid_size = ret;
|
||||
grid_size[0] = 65535;
|
||||
grid_size[1] = 65535;
|
||||
grid_size[2] = 1;
|
||||
}
|
||||
return 3 * sizeof(uint64_t) ;
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
if (ret) {
|
||||
uint64_t * block_size = ret;
|
||||
block_size[0] = 256;
|
||||
block_size[1] = 256;
|
||||
block_size[2] = 256;
|
||||
}
|
||||
return 3 * sizeof(uint64_t);
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t * max_threads_per_block = ret;
|
||||
*max_threads_per_block = 256;
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
|
||||
default:
|
||||
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void r600_destroy_screen(struct pipe_screen* pscreen)
|
||||
{
|
||||
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
|
||||
|
|
@ -727,6 +785,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
|||
rscreen->screen.get_shader_param = r600_get_shader_param;
|
||||
rscreen->screen.get_paramf = r600_get_paramf;
|
||||
rscreen->screen.get_video_param = r600_get_video_param;
|
||||
rscreen->screen.get_compute_param = r600_get_compute_param;
|
||||
rscreen->screen.is_format_supported = si_is_format_supported;
|
||||
rscreen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
|
||||
rscreen->screen.context_create = r600_create_context;
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@
|
|||
#define R600_TRACE_CS 0
|
||||
#define R600_TRACE_CS_DWORDS 6
|
||||
|
||||
struct si_pipe_compute;
|
||||
|
||||
struct r600_pipe_fences {
|
||||
struct si_resource *bo;
|
||||
unsigned *data;
|
||||
|
|
@ -88,6 +90,10 @@ struct si_pipe_sampler_state {
|
|||
float border_color[4];
|
||||
};
|
||||
|
||||
struct si_cs_shader_state {
|
||||
struct si_pipe_compute *program;
|
||||
};
|
||||
|
||||
/* needed for blitter save */
|
||||
#define NUM_TEX_UNITS 16
|
||||
|
||||
|
|
@ -139,6 +145,7 @@ struct r600_context {
|
|||
struct pipe_stencil_ref stencil_ref;
|
||||
struct si_pipe_shader_selector *ps_shader;
|
||||
struct si_pipe_shader_selector *vs_shader;
|
||||
struct si_cs_shader_state cs_shader_state;
|
||||
struct pipe_query *current_render_cond;
|
||||
unsigned current_render_cond_mode;
|
||||
struct pipe_query *saved_render_cond;
|
||||
|
|
@ -239,6 +246,9 @@ void r600_translate_index_buffer(struct r600_context *r600,
|
|||
void r600_trace_emit(struct r600_context *rctx);
|
||||
#endif
|
||||
|
||||
/* radeonsi_compute.c */
|
||||
void si_init_compute_functions(struct r600_context *rctx);
|
||||
|
||||
/*
|
||||
* common helpers
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -47,8 +47,9 @@ void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
|
|||
{
|
||||
unsigned count;
|
||||
count = state->ndw - state->last_pm4 - 2;
|
||||
state->pm4[state->last_pm4] = PKT3(state->last_opcode,
|
||||
count, predicate);
|
||||
state->pm4[state->last_pm4] =
|
||||
PKT3(state->last_opcode, count, predicate)
|
||||
| PKT3_SHADER_TYPE_S(state->compute_pkt);
|
||||
|
||||
assert(state->ndw <= SI_PM4_MAX_DW);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ struct si_pm4_state
|
|||
/* relocs for shader data */
|
||||
unsigned nrelocs;
|
||||
unsigned relocs[SI_PM4_MAX_RELOCS];
|
||||
|
||||
bool compute_pkt;
|
||||
};
|
||||
|
||||
void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
|
||||
|
|
|
|||
|
|
@ -1093,6 +1093,57 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
|
|||
}
|
||||
}
|
||||
|
||||
int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
|
||||
LLVMModuleRef mod)
|
||||
{
|
||||
unsigned char *inst_bytes;
|
||||
unsigned inst_byte_count;
|
||||
unsigned i;
|
||||
uint32_t *ptr;
|
||||
bool dump;
|
||||
|
||||
dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
|
||||
|
||||
radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count,
|
||||
r600_get_llvm_processor_name(rctx->screen->family),
|
||||
dump);
|
||||
|
||||
if (dump) {
|
||||
fprintf(stderr, "SI CODE:\n");
|
||||
for (i = 0; i < inst_byte_count; i+=4 ) {
|
||||
fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
|
||||
inst_bytes[i + 2], inst_bytes[i + 1],
|
||||
inst_bytes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
|
||||
shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
|
||||
shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
|
||||
|
||||
/* copy new shader */
|
||||
si_resource_reference(&shader->bo, NULL);
|
||||
shader->bo = si_resource_create_custom(rctx->context.screen, PIPE_USAGE_IMMUTABLE,
|
||||
inst_byte_count - 12);
|
||||
if (shader->bo == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
|
||||
if (0 /*R600_BIG_ENDIAN*/) {
|
||||
for (i = 0; i < (inst_byte_count-12)/4; ++i) {
|
||||
ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
|
||||
}
|
||||
} else {
|
||||
memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
|
||||
}
|
||||
rctx->ws->buffer_unmap(shader->bo->cs_buf);
|
||||
|
||||
free(inst_bytes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int si_pipe_shader_create(
|
||||
struct pipe_context *ctx,
|
||||
struct si_pipe_shader *shader)
|
||||
|
|
@ -1103,11 +1154,8 @@ int si_pipe_shader_create(
|
|||
struct tgsi_shader_info shader_info;
|
||||
struct lp_build_tgsi_context * bld_base;
|
||||
LLVMModuleRef mod;
|
||||
unsigned char * inst_bytes;
|
||||
unsigned inst_byte_count;
|
||||
unsigned i;
|
||||
uint32_t *ptr;
|
||||
bool dump;
|
||||
int r = 0;
|
||||
|
||||
dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
|
||||
|
||||
|
|
@ -1162,55 +1210,16 @@ int si_pipe_shader_create(
|
|||
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
|
||||
|
||||
mod = bld_base->base.gallivm->module;
|
||||
if (dump) {
|
||||
LLVMDumpModule(mod);
|
||||
}
|
||||
radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count,
|
||||
r600_get_llvm_processor_name(rctx->screen->family)
|
||||
, dump);
|
||||
if (dump) {
|
||||
fprintf(stderr, "SI CODE:\n");
|
||||
for (i = 0; i < inst_byte_count; i+=4 ) {
|
||||
fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
|
||||
inst_bytes[i + 2], inst_bytes[i + 1],
|
||||
inst_bytes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
|
||||
shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
|
||||
shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
|
||||
r = si_compile_llvm(rctx, shader, mod);
|
||||
|
||||
radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
|
||||
tgsi_parse_free(&si_shader_ctx.parse);
|
||||
|
||||
/* copy new shader */
|
||||
si_resource_reference(&shader->bo, NULL);
|
||||
shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE,
|
||||
inst_byte_count - 12);
|
||||
if (shader->bo == NULL) {
|
||||
FREE(si_shader_ctx.constants);
|
||||
FREE(si_shader_ctx.resources);
|
||||
FREE(si_shader_ctx.samplers);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
|
||||
if (0 /*R600_BIG_ENDIAN*/) {
|
||||
for (i = 0; i < (inst_byte_count-12)/4; ++i) {
|
||||
ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
|
||||
}
|
||||
} else {
|
||||
memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
|
||||
}
|
||||
rctx->ws->buffer_unmap(shader->bo->cs_buf);
|
||||
|
||||
FREE(si_shader_ctx.constants);
|
||||
FREE(si_shader_ctx.resources);
|
||||
FREE(si_shader_ctx.samplers);
|
||||
free(inst_bytes);
|
||||
|
||||
return 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@
|
|||
#ifndef RADEONSI_SHADER_H
|
||||
#define RADEONSI_SHADER_H
|
||||
|
||||
#include <llvm-c/Core.h> /* LLVMModuleRef */
|
||||
|
||||
#define SI_SGPR_CONST 0
|
||||
#define SI_SGPR_SAMPLER 2
|
||||
#define SI_SGPR_RESOURCE 4
|
||||
|
|
@ -142,6 +144,9 @@ struct si_pipe_shader {
|
|||
|
||||
/* radeonsi_shader.c */
|
||||
int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
|
||||
int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
|
||||
int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
|
||||
LLVMModuleRef mod);
|
||||
void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#define SI_CONTEXT_REG_OFFSET 0x00028000
|
||||
#define SI_CONTEXT_REG_END 0x00029000
|
||||
|
||||
#define EVENT_TYPE_CACHE_FLUSH 0x6
|
||||
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
|
||||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
|
||||
#define EVENT_TYPE_ZPASS_DONE 0x15
|
||||
|
|
@ -47,6 +48,8 @@
|
|||
* 4 - *S_PARTIAL_FLUSH
|
||||
* 5 - TS events
|
||||
*/
|
||||
#define EVENT_WRITE_INV_L2 0x100000
|
||||
|
||||
|
||||
#define PREDICATION_OP_CLEAR 0x0
|
||||
#define PREDICATION_OP_ZPASS 0x1
|
||||
|
|
@ -65,6 +68,8 @@
|
|||
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
|
||||
|
||||
#define PKT3_NOP 0x10
|
||||
#define PKT3_DISPATCH_DIRECT 0x15
|
||||
#define PKT3_DISPATCH_INDIRECT 0x16
|
||||
#define PKT3_SET_PREDICATION 0x20
|
||||
#define PKT3_COND_EXEC 0x22
|
||||
#define PKT3_PRED_EXEC 0x23
|
||||
|
|
@ -122,6 +127,7 @@
|
|||
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
|
||||
#define PKT3_IT_OPCODE_C 0xFFFF00FF
|
||||
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
|
||||
#define PKT3_SHADER_TYPE_S(x) (((x) & 0x1) << 1)
|
||||
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
|
||||
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue