i965/cs: Implement DispatchComputeIndirect support

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
Jordan Justen 2015-09-17 16:25:24 -07:00
parent d11d018ce3
commit ebbe6cdad7
3 changed files with 60 additions and 4 deletions

View file

@ -31,14 +31,46 @@
#include "brw_draw.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
#include "brw_defines.h"
static void
brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
brw_emit_gpgpu_walker(struct brw_context *brw,
const void *compute_param,
bool indirect)
{
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
const GLuint *num_groups;
uint32_t indirect_flag;
if (!indirect) {
num_groups = (const GLuint *)compute_param;
indirect_flag = 0;
} else {
GLintptr indirect_offset = (GLintptr)compute_param;
static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
num_groups = indirect_group_counts;
struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
drm_intel_bo *bo = intel_bufferobj_buffer(brw,
intel_buffer_object(indirect_buffer),
indirect_offset, 3 * sizeof(GLuint));
indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
I915_GEM_DOMAIN_VERTEX, 0,
indirect_offset + 0);
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
I915_GEM_DOMAIN_VERTEX, 0,
indirect_offset + 4);
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
I915_GEM_DOMAIN_VERTEX, 0,
indirect_offset + 8);
}
const unsigned simd_size = prog_data->simd_size;
unsigned group_size = prog_data->local_size[0] *
prog_data->local_size[1] * prog_data->local_size[2];
@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
uint32_t dwords = brw->gen < 8 ? 11 : 15;
BEGIN_BATCH(dwords);
OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
OUT_BATCH(0);
if (brw->gen >= 8) {
OUT_BATCH(0); /* Indirect Data Length */
@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
static void
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
brw_dispatch_compute_common(struct gl_context *ctx,
const void *compute_param,
bool indirect)
{
struct brw_context *brw = brw_context(ctx);
int estimated_buffer_space_needed;
@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
brw->no_batch_wrap = true;
brw_upload_compute_state(brw);
brw_emit_gpgpu_walker(brw, num_groups);
brw_emit_gpgpu_walker(brw, compute_param, indirect);
brw->no_batch_wrap = false;
@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
*/
}
static void
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
brw_dispatch_compute_common(ctx,
num_groups,
false);
}
static void
brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
{
brw_dispatch_compute_common(ctx,
(void *)indirect,
true);
}
void
brw_init_compute_functions(struct dd_function_table *functions)
{
functions->DispatchCompute = brw_dispatch_compute;
functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
}

View file

@ -2770,6 +2770,8 @@ enum brw_wm_barycentric_interp_mode {
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
/* GEN7 DW0 */
# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
/* GEN8+ DW2 */
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)

View file

@ -173,6 +173,11 @@
#define GEN7_3DPRIM_START_INSTANCE 0x243C
#define GEN7_3DPRIM_BASE_VERTEX 0x2440
/* Auto-Compute / Indirect Registers */
#define GEN7_GPGPU_DISPATCHDIMX 0x2500
#define GEN7_GPGPU_DISPATCHDIMY 0x2504
#define GEN7_GPGPU_DISPATCHDIMZ 0x2508
#define GEN7_CACHE_MODE_1 0x7004
# define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
# define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)