nvc0: implement compute support for nve4

This commit is contained in:
Christoph Bumiller 2013-02-23 19:40:23 +01:00
parent 75f1f852b0
commit e066f2f62f
18 changed files with 1881 additions and 77 deletions

View file

@ -5,6 +5,7 @@
#include "util/u_memory.h"
typedef uint32_t u32;
typedef uint16_t u16;
extern int nouveau_mesa_debug;

View file

@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_COMPUTE_CLASS 0x000090c0
#define NVC8_COMPUTE_CLASS 0x000092c0
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5

View file

@ -1,5 +1,5 @@
#ifndef RNNDB_NV50_DEFS_XML
#define RNNDB_NV50_DEFS_XML
#ifndef NV50_DEFS_XML
#define NV50_DEFS_XML
/* Autogenerated file, DO NOT EDIT manually!
@ -8,11 +8,11 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
- rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58)
- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25)
- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55)
Copyright (C) 2006-2011 by the following authors:
Copyright (C) 2006-2013 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
- Ben Skeggs (darktama, darktama_)
- B. R. <koala_br@users.sourceforge.net> (koala_br)
@ -71,6 +71,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define NV50_VSTATUS_IDLE 0x00000000
#define NV50_VSTATUS_BUSY 0x00000001
#define NV50_VSTATUS_UNK2 0x00000002
#define NV50_VSTATUS_WAITING 0x00000003
#define NV50_VSTATUS_BLOCKED 0x00000005
#define NV50_VSTATUS_FAULTED 0x00000006
#define NV50_VSTATUS_PAUSED 0x00000007
#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
@ -143,6 +150,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
#define NV50_QUERY__SIZE 0x00000010
#define NV50_QUERY_COUNTER 0x00000000
@ -151,4 +197,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_QUERY_TIME 0x00000008
#endif /* RNNDB_NV50_DEFS_XML */
#endif /* NV50_DEFS_XML */

View file

@ -14,6 +14,7 @@ C_SOURCES := \
nvc0_program.c \
nvc0_shader_state.c \
nvc0_query.c \
nve4_compute.c \
nvc0_video.c \
nvc0_video_bsp.c \
nvc0_video_vp.c \

View file

@ -63,6 +63,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
nouveau_bufctx_del(&nvc0->bufctx_3d);
nouveau_bufctx_del(&nvc0->bufctx);
nouveau_bufctx_del(&nvc0->bufctx_cp);
util_unreference_framebuffer_state(&nvc0->framebuffer);
@ -71,7 +72,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
for (s = 0; s < 5; ++s) {
for (s = 0; s < 6; ++s) {
for (i = 0; i < nvc0->num_textures[s]; ++i)
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
@ -80,8 +81,21 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL);
}
for (s = 0; s < 2; ++s) {
for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i)
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
}
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
++i) {
struct pipe_resource **res = util_dynarray_element(
&nvc0->global_residents, struct pipe_resource *, i);
pipe_resource_reference(res, NULL);
}
util_dynarray_fini(&nvc0->global_residents);
}
static void
@ -219,10 +233,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0->base.pushbuf = screen->base.pushbuf;
nvc0->base.client = screen->base.client;
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_COUNT,
&nvc0->bufctx_3d);
ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
if (!ret)
nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT,
&nvc0->bufctx_3d);
if (!ret)
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT,
&nvc0->bufctx_cp);
if (ret)
goto out_err;
@ -236,6 +253,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
pipe->draw_vbo = nvc0_draw_vbo;
pipe->clear = nvc0_clear;
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
pipe->launch_grid = nve4_launch_grid;
pipe->flush = nvc0_flush;
pipe->texture_barrier = nvc0_texture_barrier;
@ -274,23 +293,39 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
if (screen->compute) {
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
}
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
if (screen->compute)
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
nvc0->base.scratch.bo_size = 2 << 20;
memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles));
util_dynarray_init(&nvc0->global_residents);
return pipe;
out_err:
if (nvc0) {
if (nvc0->bufctx_3d)
nouveau_bufctx_del(&nvc0->bufctx_3d);
if (nvc0->bufctx_cp)
nouveau_bufctx_del(&nvc0->bufctx_cp);
if (nvc0->bufctx)
nouveau_bufctx_del(&nvc0->bufctx);
if (nvc0->blit)

View file

@ -55,7 +55,16 @@
#define NVC0_NEW_SAMPLERS (1 << 20)
#define NVC0_NEW_TFB_TARGETS (1 << 21)
#define NVC0_NEW_IDXBUF (1 << 22)
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
#define NVC0_NEW_CP_TEXTURES (1 << 2)
#define NVC0_NEW_CP_SAMPLERS (1 << 3)
#define NVC0_NEW_CP_CONSTBUF (1 << 4)
#define NVC0_NEW_CP_GLOBALS (1 << 5)
/* 3d bufctx (during draw_vbo, blit_3d) */
#define NVC0_BIND_FB 0
#define NVC0_BIND_VTX 1
#define NVC0_BIND_VTX_TMP 2
@ -63,10 +72,21 @@
#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
#define NVC0_BIND_SCREEN 245
#define NVC0_BIND_TLS 246
#define NVC0_BIND_COUNT 247
#define NVC0_BIND_SUF 245
#define NVC0_BIND_SCREEN 246
#define NVC0_BIND_TLS 247
#define NVC0_BIND_3D_COUNT 248
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
#define NVC0_BIND_CP_TEX(i) ( 16 + (i))
#define NVC0_BIND_CP_SUF 48
#define NVC0_BIND_CP_GLOBAL 49
#define NVC0_BIND_CP_DESC 50
#define NVC0_BIND_CP_SCREEN 51
#define NVC0_BIND_CP_COUNT 52
/* bufctx for other operations */
#define NVC0_BIND_2D 0
#define NVC0_BIND_M2MF 0
#define NVC0_BIND_FENCE 1
@ -81,6 +101,7 @@ struct nvc0_context {
struct nouveau_bufctx *bufctx_3d;
struct nouveau_bufctx *bufctx;
struct nouveau_bufctx *bufctx_cp;
struct nvc0_screen *screen;
@ -90,6 +111,7 @@ struct nvc0_context {
uint32_t nblocksx, uint32_t nblocksy);
uint32_t dirty;
uint32_t dirty_cp; /* dirty flags for compute state */
struct {
boolean flushed;
@ -105,8 +127,8 @@ struct nvc0_context {
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[5];
uint8_t num_samplers[5];
uint8_t num_textures[6];
uint8_t num_samplers[6];
uint8_t tls_required; /* bitmask of shader types using l[] */
uint8_t c14_bound; /* whether immediate array constbuf is bound */
uint8_t clip_enable;
@ -125,9 +147,10 @@ struct nvc0_context {
struct nvc0_program *tevlprog;
struct nvc0_program *gmtyprog;
struct nvc0_program *fragprog;
struct nvc0_program *compprog;
struct nvc0_constbuf constbuf[5][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[5];
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
@ -139,14 +162,14 @@ struct nvc0_context {
uint32_t instance_off; /* current base vertex for instanced arrays */
uint32_t instance_max; /* last instance for current draw call */
struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
unsigned num_textures[5];
uint32_t textures_dirty[5];
struct nv50_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
unsigned num_samplers[5];
uint16_t samplers_dirty[5];
struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
unsigned num_textures[6];
uint32_t textures_dirty[6];
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
unsigned num_samplers[6];
uint16_t samplers_dirty[6];
uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
struct pipe_framebuffer_state framebuffer;
struct pipe_blend_color blend_colour;
@ -169,6 +192,12 @@ struct nvc0_context {
struct nvc0_blitctx *blit;
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
uint16_t surfaces_dirty[2];
uint16_t surfaces_valid[2];
struct util_dynarray global_residents;
#ifdef NVC0_WITH_DRAW_MODULE
struct draw_context *draw;
#endif
@ -211,6 +240,8 @@ boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
uint32_t label);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
@ -236,6 +267,8 @@ void nvc0_tfb_validate(struct nvc0_context *);
extern void nvc0_init_state_functions(struct nvc0_context *);
/* nvc0_state_validate.c */
void nvc0_validate_global_residents(struct nvc0_context *,
struct nouveau_bufctx *, int bin);
extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
unsigned space_words);
@ -246,9 +279,13 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
void nve4_set_tex_handles(struct nvc0_context *);
void nvc0_validate_surfaces(struct nvc0_context *);
void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
struct nvc0_screen *);
struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context *,
@ -315,4 +352,8 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen,
/* nvc0_push.c */
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
/* nve4_compute.c */
void nve4_launch_grid(struct pipe_context *,
const uint *, const uint *, uint32_t, const void *);
#endif

View file

@ -25,6 +25,7 @@
#include "nvc0_context.h"
#include "nv50/codegen/nv50_ir_driver.h"
#include "nve4_compute.h"
/* If only they told use the actual semantic instead of just GENERIC ... */
static void
@ -533,10 +534,11 @@ nvc0_program_dump(struct nvc0_program *prog)
{
unsigned pos;
for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
debug_printf("HDR[%02lx] = 0x%08x\n",
pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
if (prog->type != PIPE_SHADER_COMPUTE) {
for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
debug_printf("HDR[%02lx] = 0x%08x\n",
pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
}
debug_printf("shader binary code (0x%x bytes):", prog->code_size);
for (pos = 0; pos < prog->code_size / 4; ++pos) {
if ((pos % 8) == 0)
@ -569,11 +571,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.resInfoCBSlot = 0;
info->io.texBindBase = 0; /* TODO */
info->io.suInfoBase = 0; /* TODO */
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = 0; /* TODO */
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.resInfoCBSlot = 15;
@ -598,14 +600,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
FREE(info->bin.syms);
if (prog->type != PIPE_SHADER_COMPUTE)
FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->num_barriers = info->numBarriers;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
@ -633,6 +637,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
case PIPE_SHADER_FRAGMENT:
ret = nvc0_fp_gen_header(prog, info);
break;
case PIPE_SHADER_COMPUTE:
prog->cp.syms = info->bin.syms;
prog->cp.num_syms = info->bin.numSyms;
break;
default:
ret = -1;
NOUVEAU_ERR("unknown program type: %u\n", prog->type);
@ -672,8 +680,9 @@ boolean
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
uint32_t lib_pos = screen->lib_code->start;
uint32_t code_pos;
@ -689,7 +698,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
* latency information is expected only at certain positions.
*/
if (screen->base.class_3d >= NVE4_3D_CLASS)
size = size + 0x70;
size = size + (is_cp ? 0x40 : 0x70);
size = align(size, 0x40);
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
@ -714,18 +723,27 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
prog->mem->start + prog->mem->size));
if (screen->base.class_3d >= NVE4_3D_CLASS) {
switch (prog->mem->start & 0xff) {
case 0x40: prog->code_base += 0x70; break;
case 0x80: prog->code_base += 0x30; break;
case 0xc0: prog->code_base += 0x70; break;
default:
prog->code_base += 0x30;
assert((prog->mem->start & 0xff) == 0x00);
break;
if (!is_cp) {
if (screen->base.class_3d >= NVE4_3D_CLASS) {
switch (prog->mem->start & 0xff) {
case 0x40: prog->code_base += 0x70; break;
case 0x80: prog->code_base += 0x30; break;
case 0xc0: prog->code_base += 0x70; break;
default:
prog->code_base += 0x30;
assert((prog->mem->start & 0xff) == 0x00);
break;
}
}
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
} else {
if (screen->base.class_3d >= NVE4_3D_CLASS) {
if (prog->mem->start & 0x40)
prog->code_base += 0x40;
assert((prog->code_base & 0x7f) == 0x00);
}
code_pos = prog->code_base;
}
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
@ -735,10 +753,10 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
nvc0_program_dump(prog);
#endif
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text,
prog->code_base + NVC0_SHADER_HEADER_SIZE,
if (!is_cp)
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
if (prog->immd_size)
nvc0->base.push_data(&nvc0->base,
@ -790,6 +808,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->code);
FREE(prog->immd_data);
FREE(prog->relocs);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
FREE(prog->cp.syms);
if (prog->tfb) {
if (nvc0->state.tfb == prog->tfb)
nvc0->state.tfb = NULL;
@ -801,3 +821,18 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->pipe = pipe;
prog->type = type;
}
uint32_t
nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
{
const struct nv50_ir_prog_symbol *syms =
(const struct nv50_ir_prog_symbol *)prog->cp.syms;
unsigned base = 0;
unsigned i;
if (prog->type != PIPE_SHADER_COMPUTE)
base = NVC0_SHADER_HEADER_SIZE;
for (i = 0; i < prog->cp.num_syms; ++i)
if (syms[i].label == label)
return prog->code_base + base + syms[i].offset;
return ~0;
}

View file

@ -22,7 +22,7 @@ struct nvc0_program {
ubyte type;
boolean translated;
boolean need_tls;
uint8_t max_gpr;
uint8_t num_gprs;
uint32_t *code;
uint32_t *immd_data;
@ -50,6 +50,13 @@ struct nvc0_program {
uint32_t tess_mode; /* ~0 if defined by the other stage */
uint32_t input_patch_size;
} tp;
struct {
uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
void *syms;
unsigned num_syms;
} cp;
uint8_t num_barriers;
void *relocs;

View file

@ -88,12 +88,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
return 16 * 5;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 15;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 12;
return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return 2048;
case PIPE_CAP_MIN_TEXEL_OFFSET:
@ -176,6 +176,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 0;
case PIPE_CAP_COMPUTE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
default:
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
return 0;
@ -186,6 +188,8 @@ static int
nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
enum pipe_shader_cap param)
{
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
switch (shader) {
case PIPE_SHADER_VERTEX:
/*
@ -195,11 +199,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
case PIPE_SHADER_COMPUTE:
if (class_3d < NVE4_3D_CLASS)
return 0;
break;
default:
return 0;
}
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
@ -216,6 +226,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_CONSTS:
return 65536 / 16;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
return NVC0_MAX_PIPE_CONSTBUFS;
case PIPE_SHADER_CAP_MAX_ADDRS:
return 1;
@ -234,7 +246,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 0;
case PIPE_SHADER_CAP_SUBROUTINES:
return 1; /* but inlining everything, we need function declarations */
return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
@ -270,6 +282,47 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
}
}
static int
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
uint64_t *data64 = (uint64_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
switch (param) {
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
data64[0] = 3;
return 8;
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
data64[1] = 65535;
data64[2] = 65535;
return 24;
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
data64[0] = 1024;
data64[1] = 1024;
data64[2] = 64;
return 24;
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
data64[0] = 1024;
return 8;
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
data64[0] = (uint64_t)1 << 40;
return 8;
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
data64[0] = 48 << 10;
return 8;
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
data64[0] = 512 << 10;
return 8;
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
data64[0] = 4096;
return 8;
default:
return 0;
}
}
static void
nvc0_screen_destroy(struct pipe_screen *pscreen)
{
@ -291,6 +344,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_bo_ref(NULL, &screen->txc);
nouveau_bo_ref(NULL, &screen->fence.bo);
nouveau_bo_ref(NULL, &screen->poly_cache);
nouveau_bo_ref(NULL, &screen->parm);
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
@ -412,6 +466,23 @@ nvc0_screen_fence_update(struct pipe_screen *pscreen)
return screen->fence.map[0];
}
static int
nvc0_screen_init_compute(struct nvc0_screen *screen)
{
screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
switch (screen->base.device->chipset & 0xf0) {
case 0xc0:
case 0xd0:
return 0;
case 0xe0:
case 0xf0:
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
default:
return -1;
}
}
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
@ -653,9 +724,9 @@ nvc0_screen_create(struct nouveau_device *dev)
/* max MPs * max warps per MP (TODO: ask kernel) */
if (screen->eng3d->oclass >= NVE4_3D_CLASS)
screen->tls_size = 8 * 64;
screen->tls_size = 8 * 64 * 32;
else
screen->tls_size = 16 * 48;
screen->tls_size = 16 * 48 * 32;
screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
screen->tls_size = align(screen->tls_size, 1 << 17);
@ -775,6 +846,9 @@ nvc0_screen_create(struct nouveau_device *dev)
IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
if (nvc0_screen_init_compute(screen))
goto fail;
PUSH_KICK (push);
screen->tic.entries = CALLOC(4096, sizeof(void *));

View file

@ -15,7 +15,10 @@
#define NVC0_TSC_MAX_ENTRIES 2048
/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
#define NVC0_MAX_PIPE_CONSTBUFS 14
#define NVC0_MAX_PIPE_CONSTBUFS 14
#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
#define NVC0_MAX_SURFACE_SLOTS 16
struct nvc0_context;
@ -29,7 +32,8 @@ struct nvc0_screen {
int num_occlusion_queries_active;
struct nouveau_bo *text;
struct nouveau_bo *uniform_bo;
struct nouveau_bo *parm; /* for COMPUTE */
struct nouveau_bo *uniform_bo; /* for 3D */
struct nouveau_bo *tls;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
@ -63,7 +67,7 @@ struct nvc0_screen {
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
struct nouveau_object *dijkstra;
struct nouveau_object *compute;
};
static INLINE struct nvc0_screen *
@ -80,6 +84,8 @@ void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
static INLINE void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{

View file

@ -95,7 +95,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x11);
PUSH_DATA (push, vp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
PUSH_DATA (push, vp->max_gpr);
PUSH_DATA (push, vp->num_gprs);
// BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
// PUSH_DATA (push, 0);
@ -120,7 +120,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x51);
PUSH_DATA (push, fp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
PUSH_DATA (push, fp->max_gpr);
PUSH_DATA (push, fp->num_gprs);
BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
PUSH_DATA (push, 0x20164010);
@ -144,7 +144,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x21);
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
PUSH_DATA (push, tp->max_gpr);
PUSH_DATA (push, tp->num_gprs);
if (tp->tp.input_patch_size <= 32)
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
@ -171,7 +171,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
PUSH_DATA (push, tp->max_gpr);
PUSH_DATA (push, tp->num_gprs);
} else {
BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
PUSH_DATA (push, 0x30);
@ -197,7 +197,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
PUSH_DATA (push, gp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
PUSH_DATA (push, gp->max_gpr);
PUSH_DATA (push, gp->num_gprs);
BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
} else {

View file

@ -489,6 +489,57 @@ nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
}
static void
nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0,
const unsigned s,
unsigned start, unsigned nr, void **cso)
{
const unsigned end = start + nr;
int last_valid = -1;
unsigned i;
if (cso) {
for (i = start; i < end; ++i) {
const unsigned p = i - start;
if (cso[p])
last_valid = i;
if (cso[p] == nvc0->samplers[s][i])
continue;
nvc0->samplers_dirty[s] |= 1 << i;
if (nvc0->samplers[s][i])
nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
nvc0->samplers[s][i] = cso[p];
}
} else {
for (i = start; i < end; ++i) {
if (nvc0->samplers[s][i]) {
nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
nvc0->samplers[s][i] = NULL;
nvc0->samplers_dirty[s] |= 1 << i;
}
}
}
if (nvc0->num_samplers[s] <= end) {
if (last_valid < 0) {
for (i = start; i && !nvc0->samplers[s][i - 1]; --i);
nvc0->num_samplers[s] = i;
} else {
nvc0->num_samplers[s] = last_valid + 1;
}
}
}
static void
nvc0_cp_sampler_states_bind(struct pipe_context *pipe,
unsigned start, unsigned nr, void **cso)
{
nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso);
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
}
/* NOTE: only called when not referenced anywhere, won't be bound */
static void
nvc0_sampler_view_destroy(struct pipe_context *pipe,
@ -561,6 +612,67 @@ nvc0_gp_set_sampler_views(struct pipe_context *pipe,
nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
}
static void
nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
unsigned start, unsigned nr,
struct pipe_sampler_view **views)
{
struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
const unsigned end = start + nr;
const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
int last_valid = -1;
unsigned i;
if (views) {
for (i = start; i < end; ++i) {
const unsigned p = i - start;
if (views[p])
last_valid = i;
if (views[p] == nvc0->textures[s][i])
continue;
nvc0->textures_dirty[s] |= 1 << i;
if (nvc0->textures[s][i]) {
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
nouveau_bufctx_reset(bctx, bin + i);
nvc0_screen_tic_unlock(nvc0->screen, old);
}
pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]);
}
} else {
for (i = start; i < end; ++i) {
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
if (!old)
continue;
nvc0->textures_dirty[s] |= 1 << i;
nvc0_screen_tic_unlock(nvc0->screen, old);
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
nouveau_bufctx_reset(bctx, bin + i);
}
}
if (nvc0->num_textures[s] <= end) {
if (last_valid < 0) {
for (i = start; i && !nvc0->textures[s][i - 1]; --i);
nvc0->num_textures[s] = i;
} else {
nvc0->num_textures[s] = last_valid + 1;
}
}
}
static void
nvc0_cp_set_sampler_views(struct pipe_context *pipe,
unsigned start, unsigned nr,
struct pipe_sampler_view **views)
{
nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views);
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES;
}
/* ============================= SHADERS =======================================
*/
@ -644,6 +756,35 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_GMTYPROG;
}
static void *
nvc0_cp_state_create(struct pipe_context *pipe,
const struct pipe_compute_state *cso)
{
struct nvc0_program *prog;
prog = CALLOC_STRUCT(nvc0_program);
if (!prog)
return NULL;
prog->type = PIPE_SHADER_COMPUTE;
prog->cp.smem_size = cso->req_local_mem;
prog->cp.lmem_size = cso->req_private_mem;
prog->parm_size = cso->req_input_mem;
prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
return (void *)prog;
}
static void
nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
nvc0->compprog = hwcso;
nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM;
}
static void
nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_constant_buffer *cb)
@ -653,14 +794,22 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
const unsigned s = nvc0_shader_stage(shader);
const unsigned i = index;
if (shader == PIPE_SHADER_COMPUTE)
return;
if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
assert(!cb || !cb->user_buffer);
if (nvc0->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
if (nvc0->constbuf[s][i].user)
nvc0->constbuf[s][i].u.buf = NULL;
else
if (nvc0->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
} else {
if (nvc0->constbuf[s][i].user)
nvc0->constbuf[s][i].u.buf = NULL;
else
if (nvc0->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
nvc0->dirty |= NVC0_NEW_CONSTBUF;
}
nvc0->constbuf_dirty[s] |= 1 << i;
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
@ -673,10 +822,6 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nvc0->constbuf[s][i].offset = cb->buffer_offset;
nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
}
nvc0->constbuf_dirty[s] |= 1 << i;
nvc0->dirty |= NVC0_NEW_CONSTBUF;
}
/* =============================================================================
@ -919,6 +1064,113 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
}
static void
nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
unsigned start, unsigned nr,
struct pipe_surface **psurfaces)
{
const unsigned end = start + nr;
const unsigned mask = ((1 << nr) - 1) << start;
unsigned i;
if (psurfaces) {
for (i = start; i < end; ++i) {
const unsigned p = i - start;
if (psurfaces[p])
nvc0->surfaces_valid[t] |= (1 << i);
else
nvc0->surfaces_valid[t] &= ~(1 << i);
pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]);
}
} else {
for (i = start; i < end; ++i)
pipe_surface_reference(&nvc0->surfaces[t][i], NULL);
nvc0->surfaces_valid[t] &= ~mask;
}
nvc0->surfaces_dirty[t] |= mask;
if (t == 0)
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
else
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
}
static void
nvc0_set_compute_resources(struct pipe_context *pipe,
unsigned start, unsigned nr,
struct pipe_surface **resources)
{
nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources);
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES;
}
static void
nvc0_set_shader_resources(struct pipe_context *pipe,
unsigned start, unsigned nr,
struct pipe_surface **resources)
{
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
}
static INLINE void
nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
{
struct nv04_resource *buf = nv04_resource(res);
if (buf) {
uint64_t limit = (buf->address + buf->base.width0) - 1;
if (limit < (1ULL << 32)) {
*phandle = (uint32_t)buf->address;
} else {
NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
"resource not contained within 32-bit address space !\n");
*phandle = 0;
}
} else {
*phandle = 0;
}
}
static void
nvc0_set_global_bindings(struct pipe_context *pipe,
unsigned start, unsigned nr,
struct pipe_resource **resources,
uint32_t **handles)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct pipe_resource **ptr;
unsigned i;
const unsigned end = start + nr;
if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
const unsigned old_size = nvc0->global_residents.size;
const unsigned req_size = end * sizeof(struct pipe_resource *);
util_dynarray_resize(&nvc0->global_residents, req_size);
memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
req_size - old_size);
}
if (resources) {
ptr = util_dynarray_element(
&nvc0->global_residents, struct pipe_resource *, start);
for (i = 0; i < nr; ++i) {
pipe_resource_reference(&ptr[i], resources[i]);
nvc0_set_global_handle(handles[i], resources[i]);
}
} else {
ptr = util_dynarray_element(
&nvc0->global_residents, struct pipe_resource *, start);
for (i = 0; i < nr; ++i)
pipe_resource_reference(&ptr[i], NULL);
}
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
}
void
nvc0_init_state_functions(struct nvc0_context *nvc0)
{
@ -941,12 +1193,14 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind;
pipe->create_sampler_view = nvc0_create_sampler_view;
pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views;
pipe->create_vs_state = nvc0_vp_state_create;
pipe->create_fs_state = nvc0_fp_state_create;
@ -958,6 +1212,10 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->delete_fs_state = nvc0_sp_state_delete;
pipe->delete_gs_state = nvc0_sp_state_delete;
pipe->create_compute_state = nvc0_cp_state_create;
pipe->bind_compute_state = nvc0_cp_state_bind;
pipe->delete_compute_state = nvc0_sp_state_delete;
pipe->set_blend_color = nvc0_set_blend_color;
pipe->set_stencil_ref = nvc0_set_stencil_ref;
pipe->set_clip_state = nvc0_set_clip_state;
@ -978,5 +1236,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->create_stream_output_target = nvc0_so_target_create;
pipe->stream_output_target_destroy = nvc0_so_target_destroy;
pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
pipe->set_shader_resources = nvc0_set_shader_resources;
}

View file

@ -430,6 +430,21 @@ nvc0_validate_sample_mask(struct nvc0_context *nvc0)
PUSH_DATA (push, 0x01);
}
void
nvc0_validate_global_residents(struct nvc0_context *nvc0,
struct nouveau_bufctx *bctx, int bin)
{
unsigned i;
for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
++i) {
struct pipe_resource *res = *util_dynarray_element(
&nvc0->global_residents, struct pipe_resource *, i);
if (res)
nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
}
}
static void
nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
@ -513,6 +528,7 @@ static struct state_validate {
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
};

View file

@ -515,7 +515,7 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
blit->vp.code_size = sizeof(code_nvc0);
}
blit->vp.max_gpr = 7;
blit->vp.num_gprs = 7;
blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
blit->vp.hdr[0] = 0x00020461; /* vertprog magic */

View file

@ -23,6 +23,7 @@
#include "nvc0_context.h"
#include "nvc0_resource.h"
#include "nv50/nv50_texture.xml.h"
#include "nv50/nv50_defs.xml.h"
#include "util/u_format.h"
@ -413,7 +414,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
static boolean
boolean
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
@ -515,3 +516,295 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
nvc0->samplers_dirty[s] = 0;
}
}
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
void
nve4_set_surface_info(struct nouveau_pushbuf *push,
struct pipe_surface *psf,
struct nvc0_screen *screen)
{
struct nv50_surface *sf = nv50_surface(psf);
struct nv04_resource *res;
uint64_t address;
uint32_t *const info = push->cur;
uint8_t log2cpp;
if (psf && !nve4_su_format_map[psf->format])
NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
push->cur += 16;
if (!psf || !nve4_su_format_map[psf->format]) {
memset(info, 0, 16 * sizeof(*info));
info[0] = 0xbadf0000;
info[1] = 0x80004000;
info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
screen->lib_code->start;
return;
}
res = nv04_resource(sf->base.texture);
address = res->address + sf->offset;
info[8] = sf->width;
info[9] = sf->height;
info[10] = sf->depth;
switch (res->base.target) {
case PIPE_TEXTURE_1D_ARRAY:
info[11] = 1;
break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
info[11] = 2;
break;
case PIPE_TEXTURE_3D:
info[11] = 3;
break;
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
info[11] = 4;
break;
default:
info[11] = 0;
break;
}
log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
/* limit in bytes for raw access */
info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
info[1] = nve4_su_format_map[sf->base.format];
#if 0
switch (util_format_get_blocksizebits(sf->base.format)) {
case 16: info[1] |= 1 << 16; break;
case 32: info[1] |= 2 << 16; break;
case 64: info[1] |= 3 << 16; break;
case 128: info[1] |= 4 << 16; break;
default:
break;
}
#else
info[1] |= log2cpp << 16;
info[1] |= 0x4000;
info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
#endif
if (res->base.target == PIPE_BUFFER) {
info[0] = address >> 8;
info[2] = sf->width - 1;
info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
info[3] = 0;
info[4] = 0;
info[5] = 0;
info[6] = 0;
info[7] = 0;
info[14] = 0;
info[15] = 0;
} else {
struct nv50_miptree *mt = nv50_miptree(&res->base);
struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
const unsigned z = sf->base.u.tex.first_layer;
if (z) {
if (mt->layout_3d) {
address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
/* doesn't work if z passes z-tile boundary */
assert(sf->depth == 1);
} else {
address += mt->layer_stride * z;
}
}
info[0] = address >> 8;
info[2] = sf->width - 1;
/* NOTE: this is really important: */
info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
info[3] = (0x88 << 24) | (lvl->pitch / 64);
info[4] = sf->height - 1;
info[4] |= (lvl->tile_mode & 0x0f0) << 25;
info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
info[5] = mt->layer_stride >> 8;
info[6] = sf->depth - 1;
info[6] |= (lvl->tile_mode & 0xf00) << 21;
info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
info[7] = 0;
info[14] = mt->ms_x;
info[15] = mt->ms_y;
}
}
static INLINE void
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
}
static INLINE void
nve4_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
}
void
nvc0_validate_surfaces(struct nvc0_context *nvc0)
{
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
nve4_update_surface_bindings(nvc0);
} else {
nvc0_update_surface_bindings(nvc0);
}
}
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
[PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
[PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
[PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
[PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
[PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
[PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
[PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
[PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
[PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
[PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
[PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
[PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
[PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
[PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
[PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
[PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
[PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
[PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
[PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
[PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
[PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
[PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
[PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
[PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
[PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
[PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
[PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
[PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
[PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
[PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
[PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
[PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
[PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
[PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
[PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
[PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
};
/* Auxiliary format description values for surface instructions.
* (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
*/
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
[PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
[PIPE_FORMAT_R32G32_SINT] = 0x3433,
[PIPE_FORMAT_R32G32_UINT] = 0x3433,
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
[PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
[PIPE_FORMAT_R16G16_UNORM] = 0x2524,
[PIPE_FORMAT_R16G16_SNORM] = 0x2524,
[PIPE_FORMAT_R16G16_SINT] = 0x2524,
[PIPE_FORMAT_R16G16_UINT] = 0x2524,
[PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
[PIPE_FORMAT_R32_SINT] = 0x2024,
[PIPE_FORMAT_R32_UINT] = 0x2024,
[PIPE_FORMAT_R32_FLOAT] = 0x2024,
[PIPE_FORMAT_R8G8_UNORM] = 0x1615,
[PIPE_FORMAT_R8G8_SNORM] = 0x1615,
[PIPE_FORMAT_R8G8_SINT] = 0x1615,
[PIPE_FORMAT_R8G8_UINT] = 0x1615,
[PIPE_FORMAT_R16_UNORM] = 0x1115,
[PIPE_FORMAT_R16_SNORM] = 0x1115,
[PIPE_FORMAT_R16_SINT] = 0x1115,
[PIPE_FORMAT_R16_UINT] = 0x1115,
[PIPE_FORMAT_R16_FLOAT] = 0x1115,
[PIPE_FORMAT_R8_UNORM] = 0x0206,
[PIPE_FORMAT_R8_SNORM] = 0x0206,
[PIPE_FORMAT_R8_SINT] = 0x0206,
[PIPE_FORMAT_R8_UINT] = 0x0206
};
/* NOTE: These are hardcoded offsets for the shader library.
* TODO: Automate them.
*/
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
[PIPE_FORMAT_R32G32_FLOAT] = 0x428,
[PIPE_FORMAT_R32G32_SINT] = 0x468,
[PIPE_FORMAT_R32G32_UINT] = 0x468,
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
[PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
[PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
[PIPE_FORMAT_R16G16_UNORM] = 0x828,
[PIPE_FORMAT_R16G16_SNORM] = 0x890,
[PIPE_FORMAT_R16G16_SINT] = 0x8f0,
[PIPE_FORMAT_R16G16_UINT] = 0x948,
[PIPE_FORMAT_R16G16_FLOAT] = 0x998,
[PIPE_FORMAT_R32_FLOAT] = 0x9e8,
[PIPE_FORMAT_R32_SINT] = 0xa30,
[PIPE_FORMAT_R32_UINT] = 0xa30,
[PIPE_FORMAT_R8G8_UNORM] = 0xa78,
[PIPE_FORMAT_R8G8_SNORM] = 0xae0,
[PIPE_FORMAT_R8G8_UINT] = 0xb48,
[PIPE_FORMAT_R8G8_SINT] = 0xb98,
[PIPE_FORMAT_R16_UNORM] = 0xbe8,
[PIPE_FORMAT_R16_SNORM] = 0xc48,
[PIPE_FORMAT_R16_SINT] = 0xca0,
[PIPE_FORMAT_R16_UINT] = 0xce8,
[PIPE_FORMAT_R16_FLOAT] = 0xd30,
[PIPE_FORMAT_R8_UNORM] = 0xd88,
[PIPE_FORMAT_R8_SNORM] = 0xde0,
[PIPE_FORMAT_R8_SINT] = 0xe38,
[PIPE_FORMAT_R8_UINT] = 0xe88,
[PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
};

View file

@ -0,0 +1,607 @@
/*
* Copyright 2012 Nouveau Project
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Christoph Bumiller
*/
#include "nvc0_context.h"
#include "nve4_compute.h"
#include "nv50/codegen/nv50_ir_driver.h"
static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
int
nve4_screen_compute_setup(struct nvc0_screen *screen,
struct nouveau_pushbuf *push)
{
struct nouveau_device *dev = screen->base.device;
struct nouveau_object *chan = screen->base.channel;
unsigned i;
int ret;
uint32_t obj_class;
switch (dev->chipset & 0xf0) {
case 0xf0:
obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
break;
case 0xe0:
obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
break;
}
ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
&screen->compute);
if (ret) {
NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
return ret;
}
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
&screen->parm);
if (ret)
return ret;
BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->oclass);
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls->offset);
PUSH_DATA (push, screen->tls->offset);
/* No idea why there are 2. Divide size by 2 to be safe.
* Actually this might be per-MP TEMP size and looks like I'm only using
* 2 MPs instead of all 8.
*/
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3);
PUSH_DATAh(push, screen->tls_size / 2);
PUSH_DATA (push, screen->tls_size / 2);
PUSH_DATA (push, 0xff);
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3);
PUSH_DATAh(push, screen->tls_size / 2);
PUSH_DATA (push, screen->tls_size / 2);
PUSH_DATA (push, 0xff);
/* Unified address space ? Who needs that ? Certainly not OpenCL.
*
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
* accessible. We cannot prevent that at the moment, so expect failure.
*/
BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
PUSH_DATA (push, 1 << 24);
BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
PUSH_DATA (push, 2 << 24);
BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
/* NOTE: these do not affect the state used by the 3D object */
BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
PUSH_DATA (push, screen->txc->offset + 65536);
PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
if (obj_class >= NVF0_COMPUTE_CLASS) {
BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
PUSH_DATA (push, 0x100);
BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
for (i = 63; i >= 1; --i)
PUSH_DATA(push, 0x38000 | i);
IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
}
BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
PUSH_DATA (push, 0); /* does not interefere with 3D */
if (obj_class >= NVF0_COMPUTE_CLASS)
IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
PUSH_DATA (push, 0); /* 0 */
PUSH_DATA (push, 0);
PUSH_DATA (push, 1); /* 1 */
PUSH_DATA (push, 0);
PUSH_DATA (push, 0); /* 2 */
PUSH_DATA (push, 1);
PUSH_DATA (push, 1); /* 3 */
PUSH_DATA (push, 1);
PUSH_DATA (push, 2); /* 4 */
PUSH_DATA (push, 0);
PUSH_DATA (push, 3); /* 5 */
PUSH_DATA (push, 0);
PUSH_DATA (push, 2); /* 6 */
PUSH_DATA (push, 1);
PUSH_DATA (push, 3); /* 7 */
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
return 0;
}
static void
nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv50_surface *sf;
struct nv04_resource *res;
uint32_t mask;
unsigned i;
const unsigned t = 1;
mask = nvc0->surfaces_dirty[t];
while (mask) {
i = ffs(mask) - 1;
mask &= ~(1 << i);
/*
* NVE4's surface load/store instructions receive all the information
* directly instead of via binding points, so we have to supply them.
*/
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, 64);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
sf = nv50_surface(nvc0->surfaces[t][i]);
if (sf) {
res = nv04_resource(sf->base.texture);
if (sf->base.writable)
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
else
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
}
}
if (nvc0->surfaces_dirty[t]) {
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
/* re-reference non-dirty surfaces */
mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
while (mask) {
i = ffs(mask) - 1;
mask &= ~(1 << i);
sf = nv50_surface(nvc0->surfaces[t][i]);
res = nv04_resource(sf->base.texture);
if (sf->base.writable)
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
else
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
}
nvc0->surfaces_dirty[t] = 0;
}
/* Thankfully, textures with samplers follow the normal rules. */
static void
nve4_compute_validate_samplers(struct nvc0_context *nvc0)
{
boolean need_flush = nve4_validate_tsc(nvc0, 5);
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
}
/* (Code duplicated at bottom for various non-convincing reasons.
* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
* entries to avoid a subchannel switch.
* Same for texture cache flushes.
* Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
*/
static void nve4_compute_validate_textures(struct nvc0_context *);
static void
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
uint64_t address;
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
unsigned i, n;
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
return;
i = ffs(dirty) - 1;
n = util_logbase2(dirty) + 1 - i;
assert(n);
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, n * 4);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
nvc0->textures_dirty[s] = 0;
nvc0->samplers_dirty[s] = 0;
}
static boolean
nve4_compute_validate_program(struct nvc0_context *nvc0)
{
struct nvc0_program *prog = nvc0->compprog;
if (prog->mem)
return TRUE;
if (!prog->translated) {
prog->translated = nvc0_program_translate(
prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
return FALSE;
}
if (unlikely(!prog->code_size))
return FALSE;
if (likely(prog->code_size)) {
if (nvc0_program_upload_code(nvc0, prog)) {
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE);
return TRUE;
}
}
return FALSE;
}
static boolean
nve4_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nve4_compute_validate_program(nvc0))
return FALSE;
if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
nve4_compute_validate_textures(nvc0);
if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
nve4_compute_validate_samplers(nvc0);
if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
nve4_compute_set_tex_handles(nvc0);
if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
nve4_compute_validate_surfaces(nvc0);
if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
nvc0_validate_global_residents(nvc0,
nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
return FALSE;
if (unlikely(nvc0->state.flushed))
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
return TRUE;
}
static void
nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *cp = nvc0->compprog;
if (!cp->parm_size)
return;
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->parm->offset);
PUSH_DATA (push, screen->parm->offset);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, cp->parm_size);
PUSH_DATA (push, 0x1);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
PUSH_DATAp(push, input, cp->parm_size / 4);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
static INLINE uint8_t
nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
{
if (shared_size > (32 << 10))
return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
if (shared_size > (16 << 10))
return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
}
static void
nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
struct nve4_cp_launch_desc *desc,
uint32_t label,
const uint *block_layout,
const uint *grid_layout)
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
unsigned i;
nve4_cp_launch_desc_init_default(desc);
desc->entry = nvc0_program_symbol_offset(cp, label);
desc->griddim_x = grid_layout[0];
desc->griddim_y = grid_layout[1];
desc->griddim_z = grid_layout[2];
desc->blockdim_x = block_layout[0];
desc->blockdim_y = block_layout[1];
desc->blockdim_z = block_layout[2];
desc->shared_size = align(cp->cp.smem_size, 0x100);
desc->local_size_p = align(cp->cp.lmem_size, 0x10);
desc->local_size_n = 0;
desc->cstack_size = 0x800;
desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
desc->gpr_alloc = cp->num_gprs;
desc->bar_alloc = cp->num_barriers;
for (i = 0; i < 7; ++i) {
const unsigned s = 5;
if (nvc0->constbuf[s][i].u.buf)
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
}
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
}
static INLINE struct nve4_cp_launch_desc *
nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
struct nouveau_bo **pbo, uint64_t *pgpuaddr)
{
uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
if (!ptr)
return NULL;
if (*pgpuaddr & 255) {
unsigned adj = 256 - (*pgpuaddr & 255);
ptr += adj;
*pgpuaddr += adj;
}
return (struct nve4_cp_launch_desc *)ptr;
}
void
nve4_launch_grid(struct pipe_context *pipe,
const uint *block_layout, const uint *grid_layout,
uint32_t label,
const void *input)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nve4_cp_launch_desc *desc;
uint64_t desc_gpuaddr;
struct nouveau_bo *desc_bo;
int ret;
desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
if (!desc)
goto out;
BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
desc_bo);
ret = !nve4_compute_state_validate(nvc0);
if (ret)
goto out;
nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
nve4_compute_dump_launch_desc(desc);
nve4_compute_upload_input(nvc0, input);
/* upload descriptor and flush */
#if 0
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, desc_gpuaddr);
PUSH_DATA (push, desc_gpuaddr);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, 256);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC);
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
#endif
BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
PUSH_DATA (push, 0x3);
BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
out:
if (ret)
NOUVEAU_ERR("Failed to launch grid !\n");
nouveau_scratch_done(&nvc0->base);
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
}
#define NVE4_TIC_ENTRY_INVALID 0x000fffff
static void
nve4_compute_validate_textures(struct nvc0_context *nvc0)
{
struct nouveau_bo *txc = nvc0->screen->txc;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const unsigned s = 5;
unsigned i;
uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
unsigned n[2] = { 0, 0 };
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
continue;
}
res = nv04_resource(tic->pipe.texture);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
PUSH_SPACE(push, 16);
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
PUSH_DATAh(push, txc->offset + (tic->id * 32));
PUSH_DATA (push, txc->offset + (tic->id * 32));
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
PUSH_DATA (push, 32);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
PUSH_DATAp(push, &tic->tic[0], 8);
commands[0][n[0]++] = (tic->id << 4) | 1;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
commands[1][n[1]++] = (tic->id << 4) | 1;
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
nvc0->tex_handles[s][i] |= tic->id;
if (dirty)
BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i)
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
if (n[0]) {
BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
PUSH_DATAp(push, commands[0], n[0]);
}
if (n[1]) {
BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
PUSH_DATAp(push, commands[1], n[1]);
}
nvc0->state.num_textures[s] = nvc0->num_textures[s];
}
static const char *nve4_cache_split_name(unsigned value)
{
switch (value) {
case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
default:
return "(invalid)";
}
}
static void
nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
{
const uint32_t *data = (const uint32_t *)desc;
unsigned i;
boolean zero = FALSE;
debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
for (i = 0; i < sizeof(*desc); i += 4) {
if (data[i / 4]) {
debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
zero = FALSE;
} else
if (!zero) {
debug_printf("...\n");
zero = TRUE;
}
}
debug_printf("entry = 0x%x\n", desc->entry);
debug_printf("grid dimensions = %ux%ux%u\n",
desc->griddim_x, desc->griddim_y, desc->griddim_z);
debug_printf("block dimensions = %ux%ux%u\n",
desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
debug_printf("s[] size: 0x%x\n", desc->shared_size);
debug_printf("l[] size: -0x%x / +0x%x\n",
desc->local_size_n, desc->local_size_p);
debug_printf("stack size: 0x%x\n", desc->cstack_size);
debug_printf("barrier count: %u\n", desc->bar_alloc);
debug_printf("$r count: %u\n", desc->gpr_alloc);
debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
for (i = 0; i < 8; ++i) {
uint64_t address;
uint32_t size = desc->cb[i].size;
boolean valid = !!(desc->cb_mask & (1 << i));
address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
if (!valid && !address && !size)
continue;
debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
i, address, size, valid ? "" : " (invalid)");
}
}

View file

@ -0,0 +1,110 @@
#ifndef NVE4_COMPUTE_H
#define NVE4_COMPUTE_H
#include "nv50/nv50_defs.xml.h"
#include "nve4_compute.xml.h"
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
*/
#define NVE4_CP_INPUT_USER 0x0000
#define NVE4_CP_INPUT_USER_LIMIT 0x1000
#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4)
#define NVE4_CP_INPUT_TEX_STRIDE 4
#define NVE4_CP_INPUT_TEX_MAX 32
#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
#define NVE4_CP_INPUT_SUF_STRIDE 64
#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
#define NVE4_CP_INPUT_SUF_MAX 32
#define NVE4_CP_INPUT_SIZE 0x1900
#define NVE4_CP_PARAM_SIZE 0x2000
struct nve4_cp_launch_desc
{
u32 unk0[8];
u32 entry;
u32 unk9[3];
u32 griddim_x : 31;
u32 unk12 : 1;
u16 griddim_y;
u16 griddim_z;
u32 unk14[3];
u16 shared_size; /* must be aligned to 0x100 */
u16 unk15;
u16 unk16;
u16 blockdim_x;
u16 blockdim_y;
u16 blockdim_z;
u32 cb_mask : 8;
u32 unk20_8 : 21;
u32 cache_split : 2;
u32 unk20_31 : 1;
u32 unk21[8];
struct {
u32 address_l;
u32 address_h : 8;
u32 reserved : 7;
u32 size : 17;
} cb[8];
u32 local_size_p : 20;
u32 unk45_20 : 7;
u32 bar_alloc : 5;
u32 local_size_n : 20;
u32 unk46_20 : 4;
u32 gpr_alloc : 8;
u32 cstack_size : 20;
u32 unk47_20 : 12;
u32 unk48[16];
};
#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA 0x41
#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC 0x11
#define NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL 0x1
static INLINE void
nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
{
memset(desc, 0, sizeof(*desc));
desc->unk0[7] = 0xbc000000;
desc->unk9[2] = 0x44014000;
desc->unk47_20 = 0x300;
}
static INLINE void
nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
struct nouveau_bo *bo,
uint32_t base, uint16_t size)
{
uint64_t address = bo->offset + base;
assert(index < 8);
assert(!(base & 0xff));
assert(size <= 65536);
desc->cb[index].address_l = address;
desc->cb[index].address_h = address >> 32;
desc->cb[index].size = size;
desc->cb_mask |= 1 << index;
}
static INLINE void
nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
const struct nvc0_constbuf *cb)
{
assert(index < 8);
if (!cb->u.buf) {
desc->cb_mask &= ~(1 << index);
} else {
const struct nv04_resource *buf = nv04_resource(cb->u.buf);
assert(!cb->user);
nve4_cp_launch_desc_set_cb(desc, index,
buf->bo, buf->offset + cb->offset, cb->size);
}
}
#endif /* NVE4_COMPUTE_H */

View file

@ -0,0 +1,269 @@
#ifndef NVE4_COMPUTE_XML
#define NVE4_COMPUTE_XML
/* Autogenerated file, DO NOT EDIT manually!
This file was generated by the rules-ng-ng headergen tool in this git repository:
http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
- nve4_compute.xml ( 6352 bytes, from 2013-03-10 14:59:45)
- copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
- nvchipsets.xml ( 3870 bytes, from 2013-03-08 12:41:50)
- nv_object.xml ( 13238 bytes, from 2013-02-07 16:35:34)
- nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
- nv50_defs.xml ( 7783 bytes, from 2013-03-08 12:42:29)
Copyright (C) 2006-2013 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
- Ben Skeggs (darktama, darktama_)
- B. R. <koala_br@users.sourceforge.net> (koala_br)
- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
- Dmitry Baryshkov
- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
- EdB <edb_@users.sf.net> (edb_)
- Erik Waling <erikwailing@users.sf.net> (erikwaling)
- Francisco Jerez <currojerez@riseup.net> (curro)
- imirkin <imirkin@users.sf.net> (imirkin)
- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
- Mark Carey <mark.carey@gmail.com> (careym)
- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
- Peter Popov <ironpeter@users.sf.net> (ironpeter)
- Richard Hughes <hughsient@users.sf.net> (hughsient)
- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
- Serge Martin
- Simon Raffeiner
- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
- sturmflut <sturmflut@users.sf.net> (sturmflut)
- Sylvain Munaut <tnt@246tNt.com>
- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
- Younes Manton <younes.m@gmail.com> (ymanton)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define NVE4_COMPUTE_UPLOAD_SIZE 0x00000180
#define NVE4_COMPUTE_UPLOAD_UNK0184 0x00000184
#define NVE4_COMPUTE_UPLOAD_ADDRESS_HIGH 0x00000188
#define NVE4_COMPUTE_UPLOAD_ADDRESS_LOW 0x0000018c
#define NVE4_COMPUTE_UNK01A0 0x000001a0
#define NVE4_COMPUTE_UNK01A4 0x000001a4
#define NVE4_COMPUTE_UNK01A8 0x000001a8
#define NVE4_COMPUTE_UNK01AC 0x000001ac
#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0
#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4
#define NVE4_COMPUTE_SHARED_BASE 0x00000214
#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c
#define NVE4_COMPUTE_UNK0280 0x00000280
#define NVE4_COMPUTE_UNK02B0 0x000002b0
#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4
#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8
#define NVE4_COMPUTE_UNK02B8 0x000002b8
#define NVE4_COMPUTE_LAUNCH 0x000002bc
#define NVE4_COMPUTE_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE__ESIZE 0x0000000c
#define NVE4_COMPUTE_TEMP_SIZE__LEN 0x00000002
#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
#define NVE4_COMPUTE_UNK0310 0x00000310
#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c
#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4
#define NVE4_COMPUTE_LINKED_TSC 0x00001234
#define NVE4_COMPUTE_TSC_FLUSH 0x00001330
#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
#define NVE4_COMPUTE_TIC_FLUSH 0x00001334
#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338
#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550
#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554
#define NVE4_COMPUTE_COND_MODE 0x00001558
#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000
#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001
#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003
#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560
#define NVE4_COMPUTE_TSC_LIMIT 0x00001564
#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578
#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c
#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
#define NVE4_COMPUTE_FLUSH 0x00001698
#define NVE4_COMPUTE_FLUSH_CODE 0x00000001
#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010
#define NVE4_COMPUTE_FLUSH_UNK8 0x00000100
#define NVE4_COMPUTE_FLUSH_CB 0x00001000
#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08
#define NVE4_COMPUTE_QUERY_GET 0x00001b0c
#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0
#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000
#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000
#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608
#define NVE4_COMPUTE_UNK260c 0x0000260c
#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030
#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff
#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16
#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044
#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff
#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048
#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000
#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16
#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050
#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff
#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0))
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0))
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15
#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4
#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff
#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000
#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27
#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8
#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff
#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0
#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000
#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24
#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc
#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff
#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0
#endif /* NVE4_COMPUTE_XML */