mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 06:40:08 +01:00
radeonsi/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2
v2: - Drop dependency on LLVM >= 3.5.1 - Rename si_create_shader() to si_shader_binary_read()
This commit is contained in:
parent
fa07f4b68a
commit
1f4e48d5b5
4 changed files with 127 additions and 59 deletions
|
|
@ -23,14 +23,15 @@
|
|||
*/
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "radeon/r600_pipe_common.h"
|
||||
#include "radeon/radeon_elf_util.h"
|
||||
#include "radeon/radeon_llvm_util.h"
|
||||
|
||||
#include "radeon/r600_cs.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader.h"
|
||||
#include "sid.h"
|
||||
|
||||
#include "radeon/radeon_llvm_util.h"
|
||||
|
||||
#define MAX_GLOBAL_BUFFERS 20
|
||||
#if HAVE_LLVM < 0x0305
|
||||
#define NUM_USER_SGPRS 2
|
||||
|
|
@ -44,14 +45,18 @@ struct si_compute {
|
|||
unsigned local_size;
|
||||
unsigned private_size;
|
||||
unsigned input_size;
|
||||
unsigned num_kernels;
|
||||
struct si_shader *kernels;
|
||||
struct radeon_shader_binary binary;
|
||||
struct si_shader program;
|
||||
unsigned num_user_sgprs;
|
||||
|
||||
struct r600_resource *input_buffer;
|
||||
struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
unsigned num_kernels;
|
||||
struct si_shader *kernels;
|
||||
LLVMContextRef llvm_ctx;
|
||||
#endif
|
||||
};
|
||||
|
||||
static void *si_create_compute_state(
|
||||
|
|
@ -61,10 +66,7 @@ static void *si_create_compute_state(
|
|||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct si_compute *program = CALLOC_STRUCT(si_compute);
|
||||
const struct pipe_llvm_program_header *header;
|
||||
const unsigned char *code;
|
||||
unsigned i;
|
||||
|
||||
program->llvm_ctx = LLVMContextCreate();
|
||||
const char *code;
|
||||
|
||||
header = cso->prog;
|
||||
code = cso->prog + sizeof(struct pipe_llvm_program_header);
|
||||
|
|
@ -74,17 +76,27 @@ static void *si_create_compute_state(
|
|||
program->private_size = cso->req_private_mem;
|
||||
program->input_size = cso->req_input_mem;
|
||||
|
||||
program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
|
||||
header->num_bytes);
|
||||
program->kernels = CALLOC(sizeof(struct si_shader),
|
||||
program->num_kernels);
|
||||
for (i = 0; i < program->num_kernels; i++) {
|
||||
LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
|
||||
code, header->num_bytes);
|
||||
si_compile_llvm(sctx->screen, &program->kernels[i], mod);
|
||||
LLVMDisposeModule(mod);
|
||||
#if HAVE_LLVM < 0x0306
|
||||
{
|
||||
unsigned i;
|
||||
program->llvm_ctx = LLVMContextCreate();
|
||||
program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx,
|
||||
code, header->num_bytes);
|
||||
program->kernels = CALLOC(sizeof(struct si_shader),
|
||||
program->num_kernels);
|
||||
for (i = 0; i < program->num_kernels; i++) {
|
||||
LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
|
||||
code, header->num_bytes);
|
||||
si_compile_llvm(sctx->screen, &program->kernels[i], mod);
|
||||
LLVMDisposeModule(mod);
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
radeon_elf_read(code, header->num_bytes, &program->binary, true);
|
||||
si_shader_binary_read(sctx->screen, &program->program, &program->binary);
|
||||
|
||||
#endif
|
||||
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
|
||||
PIPE_USAGE_IMMUTABLE, program->input_size);
|
||||
|
||||
|
|
@ -181,10 +193,15 @@ static void si_launch_grid(
|
|||
uint64_t shader_va;
|
||||
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
|
||||
unsigned i;
|
||||
struct si_shader *shader = &program->kernels[pc];
|
||||
struct si_shader *shader = &program->program;
|
||||
unsigned lds_blocks;
|
||||
unsigned num_waves_for_scratch;
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
shader = &program->kernels[pc];
|
||||
#endif
|
||||
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0) | PKT3_SHADER_TYPE_S(1));
|
||||
radeon_emit(cs, 0x80000000);
|
||||
radeon_emit(cs, 0x80000000);
|
||||
|
|
@ -198,6 +215,11 @@ static void si_launch_grid(
|
|||
|
||||
pm4->compute_pkt = true;
|
||||
|
||||
#if HAVE_LLVM >= 0x0306
|
||||
/* Read the config information */
|
||||
si_shader_binary_read_config(&program->binary, &program->program, pc);
|
||||
#endif
|
||||
|
||||
/* Upload the kernel arguments */
|
||||
|
||||
/* The extra num_work_size_bytes are for work group / work item size information */
|
||||
|
|
@ -290,6 +312,10 @@ static void si_launch_grid(
|
|||
}
|
||||
|
||||
shader_va = shader->bo->gpu_address;
|
||||
|
||||
#if HAVE_LLVM >= 0x0306
|
||||
shader_va += pc;
|
||||
#endif
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
|
@ -388,6 +414,7 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
|
|||
return;
|
||||
}
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
if (program->kernels) {
|
||||
for (int i = 0; i < program->num_kernels; i++){
|
||||
if (program->kernels[i].bo){
|
||||
|
|
@ -400,10 +427,16 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
|
|||
if (program->llvm_ctx){
|
||||
LLVMContextDispose(program->llvm_ctx);
|
||||
}
|
||||
#else
|
||||
si_shader_destroy(ctx, &program->program);
|
||||
#endif
|
||||
|
||||
pipe_resource_reference(
|
||||
(struct pipe_resource **)&program->input_buffer, NULL);
|
||||
|
||||
//And then free the program itself.
|
||||
FREE(program->binary.code);
|
||||
FREE(program->binary.config);
|
||||
FREE(program->binary.rodata);
|
||||
FREE(program);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -336,7 +336,11 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
|||
case PIPE_SHADER_COMPUTE:
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
#if HAVE_LLVM < 0x0306
|
||||
return PIPE_SHADER_IR_LLVM;
|
||||
#else
|
||||
return PIPE_SHADER_IR_NATIVE;
|
||||
#endif
|
||||
case PIPE_SHADER_CAP_DOUBLES:
|
||||
return 0; /* XXX: Enable doubles once the compiler can
|
||||
handle them. */
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
#include "gallivm/lp_bld_arit.h"
|
||||
#include "gallivm/lp_bld_flow.h"
|
||||
#include "radeon/radeon_llvm.h"
|
||||
#include "radeon/radeon_elf_util.h"
|
||||
#include "radeon/radeon_llvm_emit.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
|
|
@ -2500,52 +2501,34 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
|
|||
}
|
||||
}
|
||||
|
||||
int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
||||
LLVMModuleRef mod)
|
||||
void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
|
||||
struct si_shader *shader,
|
||||
unsigned symbol_offset)
|
||||
{
|
||||
unsigned r; /* llvm_compile result */
|
||||
unsigned i;
|
||||
unsigned char *ptr;
|
||||
struct radeon_shader_binary binary;
|
||||
bool dump = r600_can_dump_shader(&sscreen->b,
|
||||
shader->selector ? shader->selector->tokens : NULL);
|
||||
const char * gpu_family = r600_get_llvm_processor_name(sscreen->b.family);
|
||||
unsigned code_size;
|
||||
|
||||
/* Use LLVM to compile shader */
|
||||
memset(&binary, 0, sizeof(binary));
|
||||
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
|
||||
|
||||
/* Output binary dump if rscreen->debug_flags are set */
|
||||
if (dump && ! binary.disassembled) {
|
||||
fprintf(stderr, "SI CODE:\n");
|
||||
for (i = 0; i < binary.code_size; i+=4 ) {
|
||||
fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
|
||||
binary.code[i + 2], binary.code[i + 1],
|
||||
binary.code[i]);
|
||||
}
|
||||
}
|
||||
const unsigned char *config =
|
||||
radeon_shader_binary_config_start(binary, symbol_offset);
|
||||
|
||||
/* XXX: We may be able to emit some of these values directly rather than
|
||||
* extracting fields to be emitted later.
|
||||
*/
|
||||
/* Parse config data in compiled binary */
|
||||
for (i = 0; i < binary.config_size; i+= 8) {
|
||||
unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
|
||||
unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
|
||||
|
||||
for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
|
||||
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
|
||||
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
|
||||
switch (reg) {
|
||||
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
|
||||
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
|
||||
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
|
||||
case R_00B848_COMPUTE_PGM_RSRC1:
|
||||
shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
|
||||
shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
|
||||
shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||
shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||
break;
|
||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||
shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||
break;
|
||||
case R_00B84C_COMPUTE_PGM_RSRC2:
|
||||
shader->lds_size = G_00B84C_LDS_SIZE(value);
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
|
||||
break;
|
||||
case R_0286CC_SPI_PS_INPUT_ENA:
|
||||
shader->spi_ps_input_ena = value;
|
||||
|
|
@ -2561,9 +2544,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int si_shader_binary_read(struct si_screen *sscreen,
|
||||
struct si_shader *shader,
|
||||
const struct radeon_shader_binary *binary)
|
||||
{
|
||||
|
||||
unsigned i;
|
||||
unsigned code_size;
|
||||
unsigned char *ptr;
|
||||
bool dump = r600_can_dump_shader(&sscreen->b,
|
||||
shader->selector ? shader->selector->tokens : NULL);
|
||||
|
||||
if (dump && !binary->disassembled) {
|
||||
fprintf(stderr, "SI CODE:\n");
|
||||
for (i = 0; i < binary->code_size; i+=4 ) {
|
||||
fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
|
||||
binary->code[i + 2], binary->code[i + 1],
|
||||
binary->code[i]);
|
||||
}
|
||||
}
|
||||
|
||||
si_shader_binary_read_config(binary, shader, 0);
|
||||
|
||||
/* copy new shader */
|
||||
code_size = binary.code_size + binary.rodata_size;
|
||||
code_size = binary->code_size + binary->rodata_size;
|
||||
r600_resource_reference(&shader->bo, NULL);
|
||||
shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
|
||||
code_size);
|
||||
|
|
@ -2571,19 +2577,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
|
||||
util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
|
||||
if (binary.rodata_size > 0) {
|
||||
ptr += binary.code_size;
|
||||
util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
|
||||
|
||||
ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
|
||||
util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
|
||||
if (binary->rodata_size > 0) {
|
||||
ptr += binary->code_size;
|
||||
util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
|
||||
}
|
||||
|
||||
sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
|
||||
|
||||
free(binary.code);
|
||||
free(binary.config);
|
||||
free(binary.rodata);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
||||
LLVMModuleRef mod)
|
||||
{
|
||||
int r = 0;
|
||||
struct radeon_shader_binary binary;
|
||||
bool dump = r600_can_dump_shader(&sscreen->b,
|
||||
shader->selector ? shader->selector->tokens : NULL);
|
||||
memset(&binary, 0, sizeof(binary));
|
||||
r = radeon_llvm_compile(mod, &binary,
|
||||
r600_get_llvm_processor_name(sscreen->b.family), dump);
|
||||
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
r = si_shader_binary_read(sscreen, shader, &binary);
|
||||
FREE(binary.code);
|
||||
FREE(binary.config);
|
||||
FREE(binary.rodata);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@
|
|||
#include "tgsi/tgsi_scan.h"
|
||||
#include "si_state.h"
|
||||
|
||||
struct radeon_shader_binary;
|
||||
|
||||
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
|
||||
#define SI_SGPR_CONST 2
|
||||
#define SI_SGPR_SAMPLER 4
|
||||
|
|
@ -180,5 +182,10 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
|||
LLVMModuleRef mod);
|
||||
void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
|
||||
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
|
||||
int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
|
||||
const struct radeon_shader_binary *binary);
|
||||
void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
|
||||
struct si_shader *shader,
|
||||
unsigned symbol_offset);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue