gallium: add a new interface for pipe_context::launch_grid()

This introduces pipe_grid_info which contains all information to
describe a launch_grid call. This will be used to implement indirect
compute in the same fashion as indirect draw.

Changes from v2:
 - correctly initialize pipe_grid_info for nv50/nvc0

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Samuel Pitoiset 2016-01-12 18:00:00 +01:00
parent 61ed09c7ea
commit bfd695e1d2
14 changed files with 114 additions and 86 deletions

View file

@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo,
} }
static void static void
ilo_launch_grid(struct pipe_context *pipe, ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{ {
struct ilo_context *ilo = ilo_context(pipe); struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *cs = ilo->state_vector.cs; struct ilo_shader_state *cs = ilo->state_vector.cs;
@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe,
input_buf.buffer_size = input_buf.buffer_size =
ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE); ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE);
if (input_buf.buffer_size) { if (input_buf.buffer_size) {
u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input, u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input,
&input_buf.buffer_offset, &input_buf.buffer); &input_buf.buffer_offset, &input_buf.buffer);
} }
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder); ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
launch_grid(ilo, block_layout, grid_layout, &input_buf, pc); launch_grid(ilo, info->block, info->grid, &input_buf, info->pc);
ilo_render_invalidate_hw(ilo->render); ilo_render_invalidate_hw(ilo->render);

View file

@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label)
} }
void void
nv50_launch_grid(struct pipe_context *pipe, nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t label, const void *input)
{ {
struct nv50_context *nv50 = nv50_context(pipe); struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nouveau_pushbuf *push = nv50->base.pushbuf;
unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; unsigned block_size = info->block[0] * info->block[1] * info->block[2];
struct nv50_program *cp = nv50->compprog; struct nv50_program *cp = nv50->compprog;
bool ret; bool ret;
@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe,
return; return;
} }
nv50_compute_upload_input(nv50, input); nv50_compute_upload_input(nv50, info->input);
BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe,
/* grid/block setup */ /* grid/block setup */
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
PUSH_DATA (push, block_layout[2]); PUSH_DATA (push, info->block[2]);
BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
PUSH_DATA (push, 1 << 16 | block_size); PUSH_DATA (push, 1 << 16 | block_size);
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
PUSH_DATA (push, 1); PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
PUSH_DATA (push, 1); PUSH_DATA (push, 1);

View file

@ -322,7 +322,6 @@ nv98_video_buffer_create(struct pipe_context *pipe,
/* nv50_compute.c */ /* nv50_compute.c */
void void
nv50_launch_grid(struct pipe_context *, const uint *, const uint *, nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
uint32_t, const void *);
#endif #endif

View file

@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
struct pipe_context *pipe = &nv50->base.pipe; struct pipe_context *pipe = &nv50->base.pipe;
struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
struct pipe_grid_info info = {};
uint32_t mask; uint32_t mask;
uint32_t input[3]; uint32_t input[3];
const uint block[3] = { 32, 1, 1 }; const uint block[3] = { 32, 1, 1 };
const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
int c; int c, i;
if (unlikely(!screen->pm.prog)) { if (unlikely(!screen->pm.prog)) {
struct nv50_program *prog = CALLOC_STRUCT(nv50_program); struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
pipe->bind_compute_state(pipe, screen->pm.prog); pipe->bind_compute_state(pipe, screen->pm.prog);
input[0] = hq->bo->offset + hq->base_offset; input[0] = hq->bo->offset + hq->base_offset;
input[1] = hq->sequence; input[1] = hq->sequence;
pipe->launch_grid(pipe, block, grid, 0, input);
for (i = 0; i < 3; i++) {
info.block[i] = block[i];
info.grid[i] = grid[i];
}
info.pc = 0;
info.input = input;
pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);

View file

@ -183,10 +183,7 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
} }
void void
nvc0_launch_grid(struct pipe_context *pipe, nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t label,
const void *input)
{ {
struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@ -200,10 +197,10 @@ nvc0_launch_grid(struct pipe_context *pipe,
return; return;
} }
nvc0_compute_upload_input(nvc0, input); nvc0_compute_upload_input(nvc0, info->input);
BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));
BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
@ -212,18 +209,18 @@ nvc0_launch_grid(struct pipe_context *pipe,
BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
PUSH_DATA (push, cp->num_barriers); PUSH_DATA (push, cp->num_barriers);
BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
PUSH_DATA (push, cp->num_gprs); PUSH_DATA (push, cp->num_gprs);
/* grid/block setup */ /* grid/block setup */
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
PUSH_DATA (push, grid_layout[2]); PUSH_DATA (push, info->grid[2]);
BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
PUSH_DATA (push, block_layout[2]); PUSH_DATA (push, info->block[2]);
/* launch preliminary setup */ /* launch preliminary setup */
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);

View file

@ -331,11 +331,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
/* nve4_compute.c */ /* nve4_compute.c */
void nve4_launch_grid(struct pipe_context *, void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
const uint *, const uint *, uint32_t, const void *);
/* nvc0_compute.c */ /* nvc0_compute.c */
void nvc0_launch_grid(struct pipe_context *, void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
const uint *, const uint *, uint32_t, const void *);
#endif #endif

View file

@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
struct pipe_grid_info info = {};
uint32_t mask; uint32_t mask;
uint32_t input[3]; uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 }; const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
unsigned c; unsigned c, i;
if (unlikely(!screen->pm.prog)) { if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@ -989,7 +990,14 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
input[0] = (hq->bo->offset + hq->base_offset); input[0] = (hq->bo->offset + hq->base_offset);
input[1] = (hq->bo->offset + hq->base_offset) >> 32; input[1] = (hq->bo->offset + hq->base_offset) >> 32;
input[2] = hq->sequence; input[2] = hq->sequence;
pipe->launch_grid(pipe, block, grid, 0, input);
for (i = 0; i < 3; i++) {
info.block[i] = block[i];
info.grid[i] = grid[i];
}
info.pc = 0;
info.input = input;
pipe->launch_grid(pipe, &info);
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);

View file

@ -429,10 +429,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
} }
void void
nve4_launch_grid(struct pipe_context *pipe, nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t label,
const void *input)
{ {
struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@ -453,13 +450,14 @@ nve4_launch_grid(struct pipe_context *pipe,
if (ret) if (ret)
goto out; goto out;
nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); nve4_compute_setup_launch_desc(nvc0, desc, info->pc,
info->block, info->grid);
#ifdef DEBUG #ifdef DEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0)) if (debug_get_num_option("NV50_PROG_DEBUG", 0))
nve4_compute_dump_launch_desc(desc); nve4_compute_dump_launch_desc(desc);
#endif #endif
nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); nve4_compute_upload_input(nvc0, info->input, info->block, info->grid);
/* upload descriptor and flush */ /* upload descriptor and flush */
#if 0 #if 0

View file

@ -553,25 +553,24 @@ void evergreen_emit_cs_shader(
} }
static void evergreen_launch_grid( static void evergreen_launch_grid(
struct pipe_context *ctx_, struct pipe_context *ctx_, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{ {
struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_context *ctx = (struct r600_context *)ctx_;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
boolean use_kill; boolean use_kill;
ctx->cs_shader_state.pc = pc; ctx->cs_shader_state.pc = info->pc;
/* Get the config information for this kernel. */ /* Get the config information for this kernel. */
r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); r600_shader_binary_read_config(&shader->binary, &shader->bc,
info->pc, &use_kill);
#endif #endif
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input);
compute_emit_cs(ctx, block_layout, grid_layout); compute_emit_cs(ctx, info->block, info->grid);
} }
static void evergreen_set_compute_resources(struct pipe_context * ctx_, static void evergreen_set_compute_resources(struct pipe_context * ctx_,

View file

@ -196,9 +196,7 @@ static unsigned compute_num_waves_for_scratch(
} }
static void si_launch_grid( static void si_launch_grid(
struct pipe_context *ctx, struct pipe_context *ctx, const struct pipe_grid_info *info)
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{ {
struct si_context *sctx = (struct si_context*)ctx; struct si_context *sctx = (struct si_context*)ctx;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
@ -232,7 +230,7 @@ static void si_launch_grid(
pm4->compute_pkt = true; pm4->compute_pkt = true;
/* Read the config information */ /* Read the config information */
si_shader_binary_read_config(&shader->binary, &shader->config, pc); si_shader_binary_read_config(&shader->binary, &shader->config, info->pc);
/* Upload the kernel arguments */ /* Upload the kernel arguments */
@ -242,15 +240,16 @@ static void si_launch_grid(
kernel_args = sctx->b.ws->buffer_map(input_buffer->buf, kernel_args = sctx->b.ws->buffer_map(input_buffer->buf,
sctx->b.gfx.cs, PIPE_TRANSFER_WRITE); sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
kernel_args[i] = grid_layout[i]; kernel_args[i] = info->grid[i];
kernel_args[i + 3] = grid_layout[i] * block_layout[i]; kernel_args[i + 3] = info->grid[i] * info->block[i];
kernel_args[i + 6] = block_layout[i]; kernel_args[i + 6] = info->block[i];
} }
num_waves_for_scratch = compute_num_waves_for_scratch( num_waves_for_scratch = compute_num_waves_for_scratch(
&sctx->screen->b.info, block_layout, grid_layout); &sctx->screen->b.info, info->block, info->grid);
memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); memcpy(kernel_args + (num_work_size_bytes / 4), info->input,
program->input_size);
if (shader->config.scratch_bytes_per_wave > 0) { if (shader->config.scratch_bytes_per_wave > 0) {
@ -291,11 +290,11 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0); si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X, si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
S_00B81C_NUM_THREAD_FULL(block_layout[0])); S_00B81C_NUM_THREAD_FULL(info->block[0]));
si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y, si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
S_00B820_NUM_THREAD_FULL(block_layout[1])); S_00B820_NUM_THREAD_FULL(info->block[1]));
si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z, si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
S_00B824_NUM_THREAD_FULL(block_layout[2])); S_00B824_NUM_THREAD_FULL(info->block[2]));
/* Global buffers */ /* Global buffers */
for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
@ -323,7 +322,7 @@ static void si_launch_grid(
} }
shader_va = shader->bo->gpu_address; shader_va = shader->bo->gpu_address;
shader_va += pc; shader_va += info->pc;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo, radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
@ -375,9 +374,9 @@ static void si_launch_grid(
; ;
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */ si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */
si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */ si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */
si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */ si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */
si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */ si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
si_pm4_cmd_end(pm4, false); si_pm4_cmd_end(pm4, false);

View file

@ -48,6 +48,7 @@ struct pipe_constant_buffer;
struct pipe_debug_callback; struct pipe_debug_callback;
struct pipe_depth_stencil_alpha_state; struct pipe_depth_stencil_alpha_state;
struct pipe_draw_info; struct pipe_draw_info;
struct pipe_grid_info;
struct pipe_fence_handle; struct pipe_fence_handle;
struct pipe_framebuffer_state; struct pipe_framebuffer_state;
struct pipe_image_view; struct pipe_image_view;
@ -618,23 +619,9 @@ struct pipe_context {
/** /**
* Launch the compute kernel starting from instruction \a pc of the * Launch the compute kernel starting from instruction \a pc of the
* currently bound compute program. * currently bound compute program.
*
* \a grid_layout and \a block_layout are arrays of size \a
* PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
* grid (in block units) and working block (in thread units) to be
* used, respectively.
*
* \a pc For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR,
* this value will be the index of the kernel in the opencl.kernels
* metadata list.
*
* \a input will be used to initialize the INPUT resource, and it
* should point to a buffer of at least
* pipe_compute_state::req_input_mem bytes.
*/ */
void (*launch_grid)(struct pipe_context *context, void (*launch_grid)(struct pipe_context *context,
const uint *block_layout, const uint *grid_layout, const struct pipe_grid_info *info);
uint32_t pc, const void *input);
/*@}*/ /*@}*/
/** /**

View file

@ -678,6 +678,33 @@ struct pipe_blit_info
boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */ boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
}; };
/**
* Information to describe a launch_grid call.
*/
struct pipe_grid_info
{
/**
* For drivers that use PIPE_SHADER_IR_LLVM as their prefered IR, this value
* will be the index of the kernel in the opencl.kernels metadata list.
*/
uint32_t pc;
/**
* Will be used to initialize the INPUT resource, and it should point to a
* buffer of at least pipe_compute_state::req_input_mem bytes.
*/
void *input;
/**
* Determine the layout of the working block (in thread units) to be used.
*/
uint block[3];
/**
* Determine the layout of the grid (in block units) to be used.
*/
uint grid[3];
};
/** /**
* Structure used as a header for serialized LLVM programs. * Structure used as a header for serialized LLVM programs.

View file

@ -55,6 +55,7 @@ kernel::launch(command_queue &q,
const auto reduced_grid_size = const auto reduced_grid_size =
map(divides(), grid_size, block_size); map(divides(), grid_size, block_size);
void *st = exec.bind(&q, grid_offset); void *st = exec.bind(&q, grid_offset);
struct pipe_grid_info info;
// The handles are created during exec_context::bind(), so we need make // The handles are created during exec_context::bind(), so we need make
// sure to call exec_context::bind() before retrieving them. // sure to call exec_context::bind() before retrieving them.
@ -74,11 +75,13 @@ kernel::launch(command_queue &q,
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
exec.g_buffers.data(), g_handles.data()); exec.g_buffers.data(), g_handles.data());
q.pipe->launch_grid(q.pipe, // Fill information for the launch_grid() call.
pad_vector(q, block_size, 1).data(), info.block = pad_vector(q, block_size, 1).data(),
pad_vector(q, reduced_grid_size, 1).data(), info.grid = pad_vector(q, reduced_grid_size, 1).data(),
find(name_equals(_name), m.syms).offset, info.pc = find(name_equals(_name), m.sysm).offset;
exec.input.data()); info.input = exec.input.data();
q.pipe->launch_grid(q.pipe, &info);
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);

View file

@ -424,8 +424,17 @@ static void launch_grid(struct context *ctx, const uint *block_layout,
const void *input) const void *input)
{ {
struct pipe_context *pipe = ctx->pipe; struct pipe_context *pipe = ctx->pipe;
struct pipe_grid_info info;
int i;
pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); for (i = 0; i < 3; i++) {
info.block[i] = block_layout[i];
info.grid[i] = grid_layout[i];
}
info.pc = pc;
info.input = input;
pipe->launch_grid(pipe, &info);
} }
static void test_default_init(void *p, int s, int x, int y) static void test_default_init(void *p, int s, int x, int y)