mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-09 01:50:12 +01:00
i965/cs: Implement DispatchComputeIndirect support
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
parent
d11d018ce3
commit
ebbe6cdad7
3 changed files with 60 additions and 4 deletions
|
|
@ -31,14 +31,46 @@
|
|||
#include "brw_draw.h"
|
||||
#include "brw_state.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_buffer_objects.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
|
||||
static void
|
||||
brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
|
||||
brw_emit_gpgpu_walker(struct brw_context *brw,
|
||||
const void *compute_param,
|
||||
bool indirect)
|
||||
{
|
||||
const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
|
||||
|
||||
const GLuint *num_groups;
|
||||
uint32_t indirect_flag;
|
||||
|
||||
if (!indirect) {
|
||||
num_groups = (const GLuint *)compute_param;
|
||||
indirect_flag = 0;
|
||||
} else {
|
||||
GLintptr indirect_offset = (GLintptr)compute_param;
|
||||
static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
|
||||
num_groups = indirect_group_counts;
|
||||
|
||||
struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
|
||||
drm_intel_bo *bo = intel_bufferobj_buffer(brw,
|
||||
intel_buffer_object(indirect_buffer),
|
||||
indirect_offset, 3 * sizeof(GLuint));
|
||||
|
||||
indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
|
||||
|
||||
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
|
||||
I915_GEM_DOMAIN_VERTEX, 0,
|
||||
indirect_offset + 0);
|
||||
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
|
||||
I915_GEM_DOMAIN_VERTEX, 0,
|
||||
indirect_offset + 4);
|
||||
brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
|
||||
I915_GEM_DOMAIN_VERTEX, 0,
|
||||
indirect_offset + 8);
|
||||
}
|
||||
|
||||
const unsigned simd_size = prog_data->simd_size;
|
||||
unsigned group_size = prog_data->local_size[0] *
|
||||
prog_data->local_size[1] * prog_data->local_size[2];
|
||||
|
|
@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
|
|||
|
||||
uint32_t dwords = brw->gen < 8 ? 11 : 15;
|
||||
BEGIN_BATCH(dwords);
|
||||
OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
|
||||
OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
|
||||
OUT_BATCH(0);
|
||||
if (brw->gen >= 8) {
|
||||
OUT_BATCH(0); /* Indirect Data Length */
|
||||
|
|
@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
|
|||
|
||||
|
||||
static void
|
||||
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
|
||||
brw_dispatch_compute_common(struct gl_context *ctx,
|
||||
const void *compute_param,
|
||||
bool indirect)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
int estimated_buffer_space_needed;
|
||||
|
|
@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
|
|||
brw->no_batch_wrap = true;
|
||||
brw_upload_compute_state(brw);
|
||||
|
||||
brw_emit_gpgpu_walker(brw, num_groups);
|
||||
brw_emit_gpgpu_walker(brw, compute_param, indirect);
|
||||
|
||||
brw->no_batch_wrap = false;
|
||||
|
||||
|
|
@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
|
|||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
|
||||
brw_dispatch_compute_common(ctx,
|
||||
num_groups,
|
||||
false);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
|
||||
{
|
||||
brw_dispatch_compute_common(ctx,
|
||||
(void *)indirect,
|
||||
true);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_compute_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->DispatchCompute = brw_dispatch_compute;
|
||||
functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2770,6 +2770,8 @@ enum brw_wm_barycentric_interp_mode {
|
|||
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
|
||||
#define MEDIA_STATE_FLUSH 0x7004
|
||||
#define GPGPU_WALKER 0x7105
|
||||
/* GEN7 DW0 */
|
||||
# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10)
|
||||
/* GEN8+ DW2 */
|
||||
# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
|
||||
# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
|
||||
|
|
|
|||
|
|
@ -173,6 +173,11 @@
|
|||
#define GEN7_3DPRIM_START_INSTANCE 0x243C
|
||||
#define GEN7_3DPRIM_BASE_VERTEX 0x2440
|
||||
|
||||
/* Auto-Compute / Indirect Registers */
|
||||
#define GEN7_GPGPU_DISPATCHDIMX 0x2500
|
||||
#define GEN7_GPGPU_DISPATCHDIMY 0x2504
|
||||
#define GEN7_GPGPU_DISPATCHDIMZ 0x2508
|
||||
|
||||
#define GEN7_CACHE_MODE_1 0x7004
|
||||
# define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11)
|
||||
# define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue