mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
nvc0: implement compute support for nve4
This commit is contained in:
parent
75f1f852b0
commit
e066f2f62f
18 changed files with 1881 additions and 77 deletions
|
|
@ -5,6 +5,7 @@
|
|||
#include "util/u_memory.h"
|
||||
|
||||
typedef uint32_t u32;
|
||||
typedef uint16_t u16;
|
||||
|
||||
extern int nouveau_mesa_debug;
|
||||
|
||||
|
|
|
|||
|
|
@ -196,6 +196,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NVC0_COMPUTE_CLASS 0x000090c0
|
||||
#define NVC8_COMPUTE_CLASS 0x000092c0
|
||||
#define NVE4_COMPUTE_CLASS 0x0000a0c0
|
||||
#define NVF0_COMPUTE_CLASS 0x0000a1c0
|
||||
#define NV84_CRYPT_CLASS 0x000074c1
|
||||
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
|
||||
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef RNNDB_NV50_DEFS_XML
|
||||
#define RNNDB_NV50_DEFS_XML
|
||||
#ifndef NV50_DEFS_XML
|
||||
#define NV50_DEFS_XML
|
||||
|
||||
/* Autogenerated file, DO NOT EDIT manually!
|
||||
|
||||
|
|
@ -8,11 +8,11 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
|
|||
git clone git://0x04.net/rules-ng-ng
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
|
||||
- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
|
||||
- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58)
|
||||
- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25)
|
||||
- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
|
||||
- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55)
|
||||
|
||||
Copyright (C) 2006-2011 by the following authors:
|
||||
Copyright (C) 2006-2013 by the following authors:
|
||||
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
|
||||
- Ben Skeggs (darktama, darktama_)
|
||||
- B. R. <koala_br@users.sourceforge.net> (koala_br)
|
||||
|
|
@ -71,6 +71,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
|
||||
#define NV50_VSTATUS_IDLE 0x00000000
|
||||
#define NV50_VSTATUS_BUSY 0x00000001
|
||||
#define NV50_VSTATUS_UNK2 0x00000002
|
||||
#define NV50_VSTATUS_WAITING 0x00000003
|
||||
#define NV50_VSTATUS_BLOCKED 0x00000005
|
||||
#define NV50_VSTATUS_FAULTED 0x00000006
|
||||
#define NV50_VSTATUS_PAUSED 0x00000007
|
||||
#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
|
||||
#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
|
||||
#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
|
||||
|
|
@ -143,6 +150,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
|
||||
#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
|
||||
#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
|
||||
#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
|
||||
#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
|
||||
#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
|
||||
#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
|
||||
#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
|
||||
#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
|
||||
#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
|
||||
#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
|
||||
#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
|
||||
#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
|
||||
#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
|
||||
#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
|
||||
#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
|
||||
#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
|
||||
#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
|
||||
#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
|
||||
#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
|
||||
#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
|
||||
#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
|
||||
#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
|
||||
#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
|
||||
#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
|
||||
#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
|
||||
#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
|
||||
#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
|
||||
#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
|
||||
#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
|
||||
#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
|
||||
#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
|
||||
#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
|
||||
#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
|
||||
#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
|
||||
#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
|
||||
#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
|
||||
#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
|
||||
#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
|
||||
#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
|
||||
#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
|
||||
#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
|
||||
#define NV50_QUERY__SIZE 0x00000010
|
||||
#define NV50_QUERY_COUNTER 0x00000000
|
||||
|
||||
|
|
@ -151,4 +197,4 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NV50_QUERY_TIME 0x00000008
|
||||
|
||||
|
||||
#endif /* RNNDB_NV50_DEFS_XML */
|
||||
#endif /* NV50_DEFS_XML */
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ C_SOURCES := \
|
|||
nvc0_program.c \
|
||||
nvc0_shader_state.c \
|
||||
nvc0_query.c \
|
||||
nve4_compute.c \
|
||||
nvc0_video.c \
|
||||
nvc0_video_bsp.c \
|
||||
nvc0_video_vp.c \
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
|
|||
|
||||
nouveau_bufctx_del(&nvc0->bufctx_3d);
|
||||
nouveau_bufctx_del(&nvc0->bufctx);
|
||||
nouveau_bufctx_del(&nvc0->bufctx_cp);
|
||||
|
||||
util_unreference_framebuffer_state(&nvc0->framebuffer);
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
|
|||
|
||||
pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
for (s = 0; s < 6; ++s) {
|
||||
for (i = 0; i < nvc0->num_textures[s]; ++i)
|
||||
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
|
||||
|
||||
|
|
@ -80,8 +81,21 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
|
|||
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL);
|
||||
}
|
||||
|
||||
for (s = 0; s < 2; ++s) {
|
||||
for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i)
|
||||
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
|
||||
}
|
||||
|
||||
for (i = 0; i < nvc0->num_tfbbufs; ++i)
|
||||
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
|
||||
|
||||
for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
|
||||
++i) {
|
||||
struct pipe_resource **res = util_dynarray_element(
|
||||
&nvc0->global_residents, struct pipe_resource *, i);
|
||||
pipe_resource_reference(res, NULL);
|
||||
}
|
||||
util_dynarray_fini(&nvc0->global_residents);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -219,10 +233,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
|
|||
nvc0->base.pushbuf = screen->base.pushbuf;
|
||||
nvc0->base.client = screen->base.client;
|
||||
|
||||
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_COUNT,
|
||||
&nvc0->bufctx_3d);
|
||||
ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
|
||||
if (!ret)
|
||||
nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
|
||||
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT,
|
||||
&nvc0->bufctx_3d);
|
||||
if (!ret)
|
||||
ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT,
|
||||
&nvc0->bufctx_cp);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
|
|
@ -236,6 +253,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
|
|||
|
||||
pipe->draw_vbo = nvc0_draw_vbo;
|
||||
pipe->clear = nvc0_clear;
|
||||
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
pipe->launch_grid = nve4_launch_grid;
|
||||
|
||||
pipe->flush = nvc0_flush;
|
||||
pipe->texture_barrier = nvc0_texture_barrier;
|
||||
|
|
@ -274,23 +293,39 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
|
|||
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
|
||||
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
|
||||
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
|
||||
if (screen->compute) {
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
|
||||
}
|
||||
|
||||
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
|
||||
|
||||
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
|
||||
if (screen->compute)
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
|
||||
|
||||
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
|
||||
|
||||
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
|
||||
BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
|
||||
if (screen->compute)
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
|
||||
|
||||
nvc0->base.scratch.bo_size = 2 << 20;
|
||||
|
||||
memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles));
|
||||
|
||||
util_dynarray_init(&nvc0->global_residents);
|
||||
|
||||
return pipe;
|
||||
|
||||
out_err:
|
||||
if (nvc0) {
|
||||
if (nvc0->bufctx_3d)
|
||||
nouveau_bufctx_del(&nvc0->bufctx_3d);
|
||||
if (nvc0->bufctx_cp)
|
||||
nouveau_bufctx_del(&nvc0->bufctx_cp);
|
||||
if (nvc0->bufctx)
|
||||
nouveau_bufctx_del(&nvc0->bufctx);
|
||||
if (nvc0->blit)
|
||||
|
|
|
|||
|
|
@ -55,7 +55,16 @@
|
|||
#define NVC0_NEW_SAMPLERS (1 << 20)
|
||||
#define NVC0_NEW_TFB_TARGETS (1 << 21)
|
||||
#define NVC0_NEW_IDXBUF (1 << 22)
|
||||
#define NVC0_NEW_SURFACES (1 << 23)
|
||||
|
||||
#define NVC0_NEW_CP_PROGRAM (1 << 0)
|
||||
#define NVC0_NEW_CP_SURFACES (1 << 1)
|
||||
#define NVC0_NEW_CP_TEXTURES (1 << 2)
|
||||
#define NVC0_NEW_CP_SAMPLERS (1 << 3)
|
||||
#define NVC0_NEW_CP_CONSTBUF (1 << 4)
|
||||
#define NVC0_NEW_CP_GLOBALS (1 << 5)
|
||||
|
||||
/* 3d bufctx (during draw_vbo, blit_3d) */
|
||||
#define NVC0_BIND_FB 0
|
||||
#define NVC0_BIND_VTX 1
|
||||
#define NVC0_BIND_VTX_TMP 2
|
||||
|
|
@ -63,10 +72,21 @@
|
|||
#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
|
||||
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
|
||||
#define NVC0_BIND_TFB 244
|
||||
#define NVC0_BIND_SCREEN 245
|
||||
#define NVC0_BIND_TLS 246
|
||||
#define NVC0_BIND_COUNT 247
|
||||
#define NVC0_BIND_SUF 245
|
||||
#define NVC0_BIND_SCREEN 246
|
||||
#define NVC0_BIND_TLS 247
|
||||
#define NVC0_BIND_3D_COUNT 248
|
||||
|
||||
/* compute bufctx (during launch_grid) */
|
||||
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
|
||||
#define NVC0_BIND_CP_TEX(i) ( 16 + (i))
|
||||
#define NVC0_BIND_CP_SUF 48
|
||||
#define NVC0_BIND_CP_GLOBAL 49
|
||||
#define NVC0_BIND_CP_DESC 50
|
||||
#define NVC0_BIND_CP_SCREEN 51
|
||||
#define NVC0_BIND_CP_COUNT 52
|
||||
|
||||
/* bufctx for other operations */
|
||||
#define NVC0_BIND_2D 0
|
||||
#define NVC0_BIND_M2MF 0
|
||||
#define NVC0_BIND_FENCE 1
|
||||
|
|
@ -81,6 +101,7 @@ struct nvc0_context {
|
|||
|
||||
struct nouveau_bufctx *bufctx_3d;
|
||||
struct nouveau_bufctx *bufctx;
|
||||
struct nouveau_bufctx *bufctx_cp;
|
||||
|
||||
struct nvc0_screen *screen;
|
||||
|
||||
|
|
@ -90,6 +111,7 @@ struct nvc0_context {
|
|||
uint32_t nblocksx, uint32_t nblocksy);
|
||||
|
||||
uint32_t dirty;
|
||||
uint32_t dirty_cp; /* dirty flags for compute state */
|
||||
|
||||
struct {
|
||||
boolean flushed;
|
||||
|
|
@ -105,8 +127,8 @@ struct nvc0_context {
|
|||
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
|
||||
uint8_t num_vtxbufs;
|
||||
uint8_t num_vtxelts;
|
||||
uint8_t num_textures[5];
|
||||
uint8_t num_samplers[5];
|
||||
uint8_t num_textures[6];
|
||||
uint8_t num_samplers[6];
|
||||
uint8_t tls_required; /* bitmask of shader types using l[] */
|
||||
uint8_t c14_bound; /* whether immediate array constbuf is bound */
|
||||
uint8_t clip_enable;
|
||||
|
|
@ -125,9 +147,10 @@ struct nvc0_context {
|
|||
struct nvc0_program *tevlprog;
|
||||
struct nvc0_program *gmtyprog;
|
||||
struct nvc0_program *fragprog;
|
||||
struct nvc0_program *compprog;
|
||||
|
||||
struct nvc0_constbuf constbuf[5][NVC0_MAX_PIPE_CONSTBUFS];
|
||||
uint16_t constbuf_dirty[5];
|
||||
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
|
||||
uint16_t constbuf_dirty[6];
|
||||
|
||||
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vtxbufs;
|
||||
|
|
@ -139,14 +162,14 @@ struct nvc0_context {
|
|||
uint32_t instance_off; /* current base vertex for instanced arrays */
|
||||
uint32_t instance_max; /* last instance for current draw call */
|
||||
|
||||
struct pipe_sampler_view *textures[5][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_textures[5];
|
||||
uint32_t textures_dirty[5];
|
||||
struct nv50_tsc_entry *samplers[5][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers[5];
|
||||
uint16_t samplers_dirty[5];
|
||||
struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_textures[6];
|
||||
uint32_t textures_dirty[6];
|
||||
struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
|
||||
unsigned num_samplers[6];
|
||||
uint16_t samplers_dirty[6];
|
||||
|
||||
uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */
|
||||
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
|
||||
|
||||
struct pipe_framebuffer_state framebuffer;
|
||||
struct pipe_blend_color blend_colour;
|
||||
|
|
@ -169,6 +192,12 @@ struct nvc0_context {
|
|||
|
||||
struct nvc0_blitctx *blit;
|
||||
|
||||
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
|
||||
uint16_t surfaces_dirty[2];
|
||||
uint16_t surfaces_valid[2];
|
||||
|
||||
struct util_dynarray global_residents;
|
||||
|
||||
#ifdef NVC0_WITH_DRAW_MODULE
|
||||
struct draw_context *draw;
|
||||
#endif
|
||||
|
|
@ -211,6 +240,8 @@ boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
|
|||
boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
|
||||
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
|
||||
void nvc0_program_library_upload(struct nvc0_context *);
|
||||
uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
|
||||
uint32_t label);
|
||||
|
||||
/* nvc0_query.c */
|
||||
void nvc0_init_query_functions(struct nvc0_context *);
|
||||
|
|
@ -236,6 +267,8 @@ void nvc0_tfb_validate(struct nvc0_context *);
|
|||
extern void nvc0_init_state_functions(struct nvc0_context *);
|
||||
|
||||
/* nvc0_state_validate.c */
|
||||
void nvc0_validate_global_residents(struct nvc0_context *,
|
||||
struct nouveau_bufctx *, int bin);
|
||||
extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
|
||||
unsigned space_words);
|
||||
|
||||
|
|
@ -246,9 +279,13 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
|
|||
extern void nvc0_init_surface_functions(struct nvc0_context *);
|
||||
|
||||
/* nvc0_tex.c */
|
||||
boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
|
||||
void nvc0_validate_textures(struct nvc0_context *);
|
||||
void nvc0_validate_samplers(struct nvc0_context *);
|
||||
void nve4_set_tex_handles(struct nvc0_context *);
|
||||
void nvc0_validate_surfaces(struct nvc0_context *);
|
||||
void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
|
||||
struct nvc0_screen *);
|
||||
|
||||
struct pipe_sampler_view *
|
||||
nvc0_create_texture_view(struct pipe_context *,
|
||||
|
|
@ -315,4 +352,8 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen,
|
|||
/* nvc0_push.c */
|
||||
void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
|
||||
|
||||
/* nve4_compute.c */
|
||||
void nve4_launch_grid(struct pipe_context *,
|
||||
const uint *, const uint *, uint32_t, const void *);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include "nvc0_context.h"
|
||||
|
||||
#include "nv50/codegen/nv50_ir_driver.h"
|
||||
#include "nve4_compute.h"
|
||||
|
||||
/* If only they told use the actual semantic instead of just GENERIC ... */
|
||||
static void
|
||||
|
|
@ -533,10 +534,11 @@ nvc0_program_dump(struct nvc0_program *prog)
|
|||
{
|
||||
unsigned pos;
|
||||
|
||||
for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
|
||||
debug_printf("HDR[%02lx] = 0x%08x\n",
|
||||
pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
|
||||
|
||||
if (prog->type != PIPE_SHADER_COMPUTE) {
|
||||
for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
|
||||
debug_printf("HDR[%02lx] = 0x%08x\n",
|
||||
pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
|
||||
}
|
||||
debug_printf("shader binary code (0x%x bytes):", prog->code_size);
|
||||
for (pos = 0; pos < prog->code_size / 4; ++pos) {
|
||||
if ((pos % 8) == 0)
|
||||
|
|
@ -569,11 +571,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.resInfoCBSlot = 0;
|
||||
info->io.texBindBase = 0; /* TODO */
|
||||
info->io.suInfoBase = 0; /* TODO */
|
||||
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
|
||||
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
|
||||
}
|
||||
info->io.msInfoCBSlot = 0;
|
||||
info->io.msInfoBase = 0; /* TODO */
|
||||
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
|
||||
} else {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.resInfoCBSlot = 15;
|
||||
|
|
@ -598,14 +600,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
NOUVEAU_ERR("shader translation failed: %i\n", ret);
|
||||
goto out;
|
||||
}
|
||||
FREE(info->bin.syms);
|
||||
if (prog->type != PIPE_SHADER_COMPUTE)
|
||||
FREE(info->bin.syms);
|
||||
|
||||
prog->code = info->bin.code;
|
||||
prog->code_size = info->bin.codeSize;
|
||||
prog->immd_data = info->immd.buf;
|
||||
prog->immd_size = info->immd.bufSize;
|
||||
prog->relocs = info->bin.relocData;
|
||||
prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
|
||||
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
|
||||
prog->num_barriers = info->numBarriers;
|
||||
|
||||
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
|
||||
|
||||
|
|
@ -633,6 +637,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
case PIPE_SHADER_FRAGMENT:
|
||||
ret = nvc0_fp_gen_header(prog, info);
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
prog->cp.syms = info->bin.syms;
|
||||
prog->cp.num_syms = info->bin.numSyms;
|
||||
break;
|
||||
default:
|
||||
ret = -1;
|
||||
NOUVEAU_ERR("unknown program type: %u\n", prog->type);
|
||||
|
|
@ -672,8 +680,9 @@ boolean
|
|||
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
|
||||
int ret;
|
||||
uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
|
||||
uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
|
||||
uint32_t lib_pos = screen->lib_code->start;
|
||||
uint32_t code_pos;
|
||||
|
||||
|
|
@ -689,7 +698,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
* latency information is expected only at certain positions.
|
||||
*/
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS)
|
||||
size = size + 0x70;
|
||||
size = size + (is_cp ? 0x40 : 0x70);
|
||||
size = align(size, 0x40);
|
||||
|
||||
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
|
||||
|
|
@ -714,18 +723,27 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
|
||||
prog->mem->start + prog->mem->size));
|
||||
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS) {
|
||||
switch (prog->mem->start & 0xff) {
|
||||
case 0x40: prog->code_base += 0x70; break;
|
||||
case 0x80: prog->code_base += 0x30; break;
|
||||
case 0xc0: prog->code_base += 0x70; break;
|
||||
default:
|
||||
prog->code_base += 0x30;
|
||||
assert((prog->mem->start & 0xff) == 0x00);
|
||||
break;
|
||||
if (!is_cp) {
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS) {
|
||||
switch (prog->mem->start & 0xff) {
|
||||
case 0x40: prog->code_base += 0x70; break;
|
||||
case 0x80: prog->code_base += 0x30; break;
|
||||
case 0xc0: prog->code_base += 0x70; break;
|
||||
default:
|
||||
prog->code_base += 0x30;
|
||||
assert((prog->mem->start & 0xff) == 0x00);
|
||||
break;
|
||||
}
|
||||
}
|
||||
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
|
||||
} else {
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS) {
|
||||
if (prog->mem->start & 0x40)
|
||||
prog->code_base += 0x40;
|
||||
assert((prog->code_base & 0x7f) == 0x00);
|
||||
}
|
||||
code_pos = prog->code_base;
|
||||
}
|
||||
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
|
||||
|
||||
if (prog->relocs)
|
||||
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
|
||||
|
|
@ -735,10 +753,10 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
nvc0_program_dump(prog);
|
||||
#endif
|
||||
|
||||
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
|
||||
NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
|
||||
nvc0->base.push_data(&nvc0->base, screen->text,
|
||||
prog->code_base + NVC0_SHADER_HEADER_SIZE,
|
||||
if (!is_cp)
|
||||
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
|
||||
NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
|
||||
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
|
||||
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
|
||||
if (prog->immd_size)
|
||||
nvc0->base.push_data(&nvc0->base,
|
||||
|
|
@ -790,6 +808,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
FREE(prog->code);
|
||||
FREE(prog->immd_data);
|
||||
FREE(prog->relocs);
|
||||
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
|
||||
FREE(prog->cp.syms);
|
||||
if (prog->tfb) {
|
||||
if (nvc0->state.tfb == prog->tfb)
|
||||
nvc0->state.tfb = NULL;
|
||||
|
|
@ -801,3 +821,18 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
prog->pipe = pipe;
|
||||
prog->type = type;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
|
||||
{
|
||||
const struct nv50_ir_prog_symbol *syms =
|
||||
(const struct nv50_ir_prog_symbol *)prog->cp.syms;
|
||||
unsigned base = 0;
|
||||
unsigned i;
|
||||
if (prog->type != PIPE_SHADER_COMPUTE)
|
||||
base = NVC0_SHADER_HEADER_SIZE;
|
||||
for (i = 0; i < prog->cp.num_syms; ++i)
|
||||
if (syms[i].label == label)
|
||||
return prog->code_base + base + syms[i].offset;
|
||||
return ~0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ struct nvc0_program {
|
|||
ubyte type;
|
||||
boolean translated;
|
||||
boolean need_tls;
|
||||
uint8_t max_gpr;
|
||||
uint8_t num_gprs;
|
||||
|
||||
uint32_t *code;
|
||||
uint32_t *immd_data;
|
||||
|
|
@ -50,6 +50,13 @@ struct nvc0_program {
|
|||
uint32_t tess_mode; /* ~0 if defined by the other stage */
|
||||
uint32_t input_patch_size;
|
||||
} tp;
|
||||
struct {
|
||||
uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
|
||||
uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
|
||||
void *syms;
|
||||
unsigned num_syms;
|
||||
} cp;
|
||||
uint8_t num_barriers;
|
||||
|
||||
void *relocs;
|
||||
|
||||
|
|
|
|||
|
|
@ -88,12 +88,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
|
||||
switch (param) {
|
||||
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
|
||||
return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
|
||||
return 16 * 5;
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
return 15;
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
return 12;
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return 2048;
|
||||
case PIPE_CAP_MIN_TEXEL_OFFSET:
|
||||
|
|
@ -176,6 +176,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
return 0;
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
|
||||
default:
|
||||
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
|
||||
return 0;
|
||||
|
|
@ -186,6 +188,8 @@ static int
|
|||
nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
|
||||
|
||||
switch (shader) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
/*
|
||||
|
|
@ -195,11 +199,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_GEOMETRY:
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
if (class_3d < NVE4_3D_CLASS)
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (param) {
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
|
||||
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
|
||||
|
|
@ -216,6 +226,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_MAX_CONSTS:
|
||||
return 65536 / 16;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
|
||||
return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
|
||||
return NVC0_MAX_PIPE_CONSTBUFS;
|
||||
case PIPE_SHADER_CAP_MAX_ADDRS:
|
||||
return 1;
|
||||
|
|
@ -234,7 +246,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
return 1; /* but inlining everything, we need function declarations */
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_INTEGERS:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
|
|
@ -270,6 +282,47 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||
}
|
||||
}
|
||||
|
||||
static int
|
||||
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
uint64_t *data64 = (uint64_t *)data;
|
||||
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
|
||||
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
data64[0] = 3;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
|
||||
data64[1] = 65535;
|
||||
data64[2] = 65535;
|
||||
return 24;
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
data64[0] = 1024;
|
||||
data64[1] = 1024;
|
||||
data64[2] = 64;
|
||||
return 24;
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
data64[0] = 1024;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
|
||||
data64[0] = (uint64_t)1 << 40;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
|
||||
data64[0] = 48 << 10;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
|
||||
data64[0] = 512 << 10;
|
||||
return 8;
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
|
||||
data64[0] = 4096;
|
||||
return 8;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_screen_destroy(struct pipe_screen *pscreen)
|
||||
{
|
||||
|
|
@ -291,6 +344,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
|
|||
nouveau_bo_ref(NULL, &screen->txc);
|
||||
nouveau_bo_ref(NULL, &screen->fence.bo);
|
||||
nouveau_bo_ref(NULL, &screen->poly_cache);
|
||||
nouveau_bo_ref(NULL, &screen->parm);
|
||||
|
||||
nouveau_heap_destroy(&screen->lib_code);
|
||||
nouveau_heap_destroy(&screen->text_heap);
|
||||
|
|
@ -412,6 +466,23 @@ nvc0_screen_fence_update(struct pipe_screen *pscreen)
|
|||
return screen->fence.map[0];
|
||||
}
|
||||
|
||||
static int
|
||||
nvc0_screen_init_compute(struct nvc0_screen *screen)
|
||||
{
|
||||
screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
|
||||
|
||||
switch (screen->base.device->chipset & 0xf0) {
|
||||
case 0xc0:
|
||||
case 0xd0:
|
||||
return 0;
|
||||
case 0xe0:
|
||||
case 0xf0:
|
||||
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define FAIL_SCREEN_INIT(str, err) \
|
||||
do { \
|
||||
NOUVEAU_ERR(str, err); \
|
||||
|
|
@ -653,9 +724,9 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
|
||||
/* max MPs * max warps per MP (TODO: ask kernel) */
|
||||
if (screen->eng3d->oclass >= NVE4_3D_CLASS)
|
||||
screen->tls_size = 8 * 64;
|
||||
screen->tls_size = 8 * 64 * 32;
|
||||
else
|
||||
screen->tls_size = 16 * 48;
|
||||
screen->tls_size = 16 * 48 * 32;
|
||||
screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
|
||||
screen->tls_size = align(screen->tls_size, 1 << 17);
|
||||
|
||||
|
|
@ -775,6 +846,9 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
|
||||
IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
|
||||
|
||||
if (nvc0_screen_init_compute(screen))
|
||||
goto fail;
|
||||
|
||||
PUSH_KICK (push);
|
||||
|
||||
screen->tic.entries = CALLOC(4096, sizeof(void *));
|
||||
|
|
|
|||
|
|
@ -15,7 +15,10 @@
|
|||
#define NVC0_TSC_MAX_ENTRIES 2048
|
||||
|
||||
/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
|
||||
#define NVC0_MAX_PIPE_CONSTBUFS 14
|
||||
#define NVC0_MAX_PIPE_CONSTBUFS 14
|
||||
#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
|
||||
|
||||
#define NVC0_MAX_SURFACE_SLOTS 16
|
||||
|
||||
struct nvc0_context;
|
||||
|
||||
|
|
@ -29,7 +32,8 @@ struct nvc0_screen {
|
|||
int num_occlusion_queries_active;
|
||||
|
||||
struct nouveau_bo *text;
|
||||
struct nouveau_bo *uniform_bo;
|
||||
struct nouveau_bo *parm; /* for COMPUTE */
|
||||
struct nouveau_bo *uniform_bo; /* for 3D */
|
||||
struct nouveau_bo *tls;
|
||||
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
|
||||
struct nouveau_bo *poly_cache;
|
||||
|
|
@ -63,7 +67,7 @@ struct nvc0_screen {
|
|||
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
|
||||
struct nouveau_object *eng2d;
|
||||
struct nouveau_object *m2mf;
|
||||
struct nouveau_object *dijkstra;
|
||||
struct nouveau_object *compute;
|
||||
};
|
||||
|
||||
static INLINE struct nvc0_screen *
|
||||
|
|
@ -80,6 +84,8 @@ void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
|
|||
int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
|
||||
int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
|
||||
|
||||
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
|
||||
|
||||
static INLINE void
|
||||
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
|
|||
PUSH_DATA (push, 0x11);
|
||||
PUSH_DATA (push, vp->code_base);
|
||||
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
|
||||
PUSH_DATA (push, vp->max_gpr);
|
||||
PUSH_DATA (push, vp->num_gprs);
|
||||
|
||||
// BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
|
||||
// PUSH_DATA (push, 0);
|
||||
|
|
@ -120,7 +120,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
|
|||
PUSH_DATA (push, 0x51);
|
||||
PUSH_DATA (push, fp->code_base);
|
||||
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
|
||||
PUSH_DATA (push, fp->max_gpr);
|
||||
PUSH_DATA (push, fp->num_gprs);
|
||||
|
||||
BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
|
||||
PUSH_DATA (push, 0x20164010);
|
||||
|
|
@ -144,7 +144,7 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
|
|||
PUSH_DATA (push, 0x21);
|
||||
PUSH_DATA (push, tp->code_base);
|
||||
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
|
||||
PUSH_DATA (push, tp->max_gpr);
|
||||
PUSH_DATA (push, tp->num_gprs);
|
||||
|
||||
if (tp->tp.input_patch_size <= 32)
|
||||
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
|
||||
|
|
@ -171,7 +171,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
|
|||
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
|
||||
PUSH_DATA (push, tp->code_base);
|
||||
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
|
||||
PUSH_DATA (push, tp->max_gpr);
|
||||
PUSH_DATA (push, tp->num_gprs);
|
||||
} else {
|
||||
BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
|
||||
PUSH_DATA (push, 0x30);
|
||||
|
|
@ -197,7 +197,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
|
|||
BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
|
||||
PUSH_DATA (push, gp->code_base);
|
||||
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
|
||||
PUSH_DATA (push, gp->max_gpr);
|
||||
PUSH_DATA (push, gp->num_gprs);
|
||||
BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
|
||||
PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -489,6 +489,57 @@ nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
|
|||
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0,
|
||||
const unsigned s,
|
||||
unsigned start, unsigned nr, void **cso)
|
||||
{
|
||||
const unsigned end = start + nr;
|
||||
int last_valid = -1;
|
||||
unsigned i;
|
||||
|
||||
if (cso) {
|
||||
for (i = start; i < end; ++i) {
|
||||
const unsigned p = i - start;
|
||||
if (cso[p])
|
||||
last_valid = i;
|
||||
if (cso[p] == nvc0->samplers[s][i])
|
||||
continue;
|
||||
nvc0->samplers_dirty[s] |= 1 << i;
|
||||
|
||||
if (nvc0->samplers[s][i])
|
||||
nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
|
||||
nvc0->samplers[s][i] = cso[p];
|
||||
}
|
||||
} else {
|
||||
for (i = start; i < end; ++i) {
|
||||
if (nvc0->samplers[s][i]) {
|
||||
nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
|
||||
nvc0->samplers[s][i] = NULL;
|
||||
nvc0->samplers_dirty[s] |= 1 << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->num_samplers[s] <= end) {
|
||||
if (last_valid < 0) {
|
||||
for (i = start; i && !nvc0->samplers[s][i - 1]; --i);
|
||||
nvc0->num_samplers[s] = i;
|
||||
} else {
|
||||
nvc0->num_samplers[s] = last_valid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_cp_sampler_states_bind(struct pipe_context *pipe,
|
||||
unsigned start, unsigned nr, void **cso)
|
||||
{
|
||||
nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso);
|
||||
|
||||
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
|
||||
}
|
||||
|
||||
/* NOTE: only called when not referenced anywhere, won't be bound */
|
||||
static void
|
||||
nvc0_sampler_view_destroy(struct pipe_context *pipe,
|
||||
|
|
@ -561,6 +612,67 @@ nvc0_gp_set_sampler_views(struct pipe_context *pipe,
|
|||
nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_sampler_view **views)
|
||||
{
|
||||
struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
|
||||
const unsigned end = start + nr;
|
||||
const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
|
||||
int last_valid = -1;
|
||||
unsigned i;
|
||||
|
||||
if (views) {
|
||||
for (i = start; i < end; ++i) {
|
||||
const unsigned p = i - start;
|
||||
if (views[p])
|
||||
last_valid = i;
|
||||
if (views[p] == nvc0->textures[s][i])
|
||||
continue;
|
||||
nvc0->textures_dirty[s] |= 1 << i;
|
||||
|
||||
if (nvc0->textures[s][i]) {
|
||||
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
nouveau_bufctx_reset(bctx, bin + i);
|
||||
nvc0_screen_tic_unlock(nvc0->screen, old);
|
||||
}
|
||||
pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]);
|
||||
}
|
||||
} else {
|
||||
for (i = start; i < end; ++i) {
|
||||
struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
if (!old)
|
||||
continue;
|
||||
nvc0->textures_dirty[s] |= 1 << i;
|
||||
|
||||
nvc0_screen_tic_unlock(nvc0->screen, old);
|
||||
pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
|
||||
nouveau_bufctx_reset(bctx, bin + i);
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->num_textures[s] <= end) {
|
||||
if (last_valid < 0) {
|
||||
for (i = start; i && !nvc0->textures[s][i - 1]; --i);
|
||||
nvc0->num_textures[s] = i;
|
||||
} else {
|
||||
nvc0->num_textures[s] = last_valid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_cp_set_sampler_views(struct pipe_context *pipe,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_sampler_view **views)
|
||||
{
|
||||
nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views);
|
||||
|
||||
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES;
|
||||
}
|
||||
|
||||
|
||||
/* ============================= SHADERS =======================================
|
||||
*/
|
||||
|
||||
|
|
@ -644,6 +756,35 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
|
|||
nvc0->dirty |= NVC0_NEW_GMTYPROG;
|
||||
}
|
||||
|
||||
static void *
|
||||
nvc0_cp_state_create(struct pipe_context *pipe,
|
||||
const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct nvc0_program *prog;
|
||||
|
||||
prog = CALLOC_STRUCT(nvc0_program);
|
||||
if (!prog)
|
||||
return NULL;
|
||||
prog->type = PIPE_SHADER_COMPUTE;
|
||||
|
||||
prog->cp.smem_size = cso->req_local_mem;
|
||||
prog->cp.lmem_size = cso->req_private_mem;
|
||||
prog->parm_size = cso->req_input_mem;
|
||||
|
||||
prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
|
||||
nvc0->compprog = hwcso;
|
||||
nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
||||
struct pipe_constant_buffer *cb)
|
||||
|
|
@ -653,14 +794,22 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
|||
const unsigned s = nvc0_shader_stage(shader);
|
||||
const unsigned i = index;
|
||||
|
||||
if (shader == PIPE_SHADER_COMPUTE)
|
||||
return;
|
||||
if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
|
||||
assert(!cb || !cb->user_buffer);
|
||||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
|
||||
|
||||
if (nvc0->constbuf[s][i].user)
|
||||
nvc0->constbuf[s][i].u.buf = NULL;
|
||||
else
|
||||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
|
||||
nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
|
||||
} else {
|
||||
if (nvc0->constbuf[s][i].user)
|
||||
nvc0->constbuf[s][i].u.buf = NULL;
|
||||
else
|
||||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
|
||||
|
||||
nvc0->dirty |= NVC0_NEW_CONSTBUF;
|
||||
}
|
||||
nvc0->constbuf_dirty[s] |= 1 << i;
|
||||
|
||||
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
|
||||
|
||||
|
|
@ -673,10 +822,6 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
|||
nvc0->constbuf[s][i].offset = cb->buffer_offset;
|
||||
nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
|
||||
}
|
||||
|
||||
nvc0->constbuf_dirty[s] |= 1 << i;
|
||||
|
||||
nvc0->dirty |= NVC0_NEW_CONSTBUF;
|
||||
}
|
||||
|
||||
/* =============================================================================
|
||||
|
|
@ -919,6 +1064,113 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
|
|||
nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_surface **psurfaces)
|
||||
{
|
||||
const unsigned end = start + nr;
|
||||
const unsigned mask = ((1 << nr) - 1) << start;
|
||||
unsigned i;
|
||||
|
||||
if (psurfaces) {
|
||||
for (i = start; i < end; ++i) {
|
||||
const unsigned p = i - start;
|
||||
if (psurfaces[p])
|
||||
nvc0->surfaces_valid[t] |= (1 << i);
|
||||
else
|
||||
nvc0->surfaces_valid[t] &= ~(1 << i);
|
||||
pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]);
|
||||
}
|
||||
} else {
|
||||
for (i = start; i < end; ++i)
|
||||
pipe_surface_reference(&nvc0->surfaces[t][i], NULL);
|
||||
nvc0->surfaces_valid[t] &= ~mask;
|
||||
}
|
||||
nvc0->surfaces_dirty[t] |= mask;
|
||||
|
||||
if (t == 0)
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
|
||||
else
|
||||
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_set_compute_resources(struct pipe_context *pipe,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_surface **resources)
|
||||
{
|
||||
nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources);
|
||||
|
||||
nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_set_shader_resources(struct pipe_context *pipe,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_surface **resources)
|
||||
{
|
||||
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
|
||||
|
||||
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
|
||||
{
|
||||
struct nv04_resource *buf = nv04_resource(res);
|
||||
if (buf) {
|
||||
uint64_t limit = (buf->address + buf->base.width0) - 1;
|
||||
if (limit < (1ULL << 32)) {
|
||||
*phandle = (uint32_t)buf->address;
|
||||
} else {
|
||||
NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
|
||||
"resource not contained within 32-bit address space !\n");
|
||||
*phandle = 0;
|
||||
}
|
||||
} else {
|
||||
*phandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_set_global_bindings(struct pipe_context *pipe,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_resource **resources,
|
||||
uint32_t **handles)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct pipe_resource **ptr;
|
||||
unsigned i;
|
||||
const unsigned end = start + nr;
|
||||
|
||||
if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
|
||||
const unsigned old_size = nvc0->global_residents.size;
|
||||
const unsigned req_size = end * sizeof(struct pipe_resource *);
|
||||
util_dynarray_resize(&nvc0->global_residents, req_size);
|
||||
memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
|
||||
req_size - old_size);
|
||||
}
|
||||
|
||||
if (resources) {
|
||||
ptr = util_dynarray_element(
|
||||
&nvc0->global_residents, struct pipe_resource *, start);
|
||||
for (i = 0; i < nr; ++i) {
|
||||
pipe_resource_reference(&ptr[i], resources[i]);
|
||||
nvc0_set_global_handle(handles[i], resources[i]);
|
||||
}
|
||||
} else {
|
||||
ptr = util_dynarray_element(
|
||||
&nvc0->global_residents, struct pipe_resource *, start);
|
||||
for (i = 0; i < nr; ++i)
|
||||
pipe_resource_reference(&ptr[i], NULL);
|
||||
}
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
|
||||
|
||||
nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
|
||||
}
|
||||
|
||||
void
|
||||
nvc0_init_state_functions(struct nvc0_context *nvc0)
|
||||
{
|
||||
|
|
@ -941,12 +1193,14 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
|
|||
pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
|
||||
pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
|
||||
pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
|
||||
pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind;
|
||||
|
||||
pipe->create_sampler_view = nvc0_create_sampler_view;
|
||||
pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
|
||||
pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
|
||||
pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
|
||||
pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
|
||||
pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views;
|
||||
|
||||
pipe->create_vs_state = nvc0_vp_state_create;
|
||||
pipe->create_fs_state = nvc0_fp_state_create;
|
||||
|
|
@ -958,6 +1212,10 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
|
|||
pipe->delete_fs_state = nvc0_sp_state_delete;
|
||||
pipe->delete_gs_state = nvc0_sp_state_delete;
|
||||
|
||||
pipe->create_compute_state = nvc0_cp_state_create;
|
||||
pipe->bind_compute_state = nvc0_cp_state_bind;
|
||||
pipe->delete_compute_state = nvc0_sp_state_delete;
|
||||
|
||||
pipe->set_blend_color = nvc0_set_blend_color;
|
||||
pipe->set_stencil_ref = nvc0_set_stencil_ref;
|
||||
pipe->set_clip_state = nvc0_set_clip_state;
|
||||
|
|
@ -978,5 +1236,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
|
|||
pipe->create_stream_output_target = nvc0_so_target_create;
|
||||
pipe->stream_output_target_destroy = nvc0_so_target_destroy;
|
||||
pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
|
||||
|
||||
pipe->set_global_binding = nvc0_set_global_bindings;
|
||||
pipe->set_compute_resources = nvc0_set_compute_resources;
|
||||
pipe->set_shader_resources = nvc0_set_shader_resources;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -430,6 +430,21 @@ nvc0_validate_sample_mask(struct nvc0_context *nvc0)
|
|||
PUSH_DATA (push, 0x01);
|
||||
}
|
||||
|
||||
void
|
||||
nvc0_validate_global_residents(struct nvc0_context *nvc0,
|
||||
struct nouveau_bufctx *bctx, int bin)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
|
||||
++i) {
|
||||
struct pipe_resource *res = *util_dynarray_element(
|
||||
&nvc0->global_residents, struct pipe_resource *, i);
|
||||
if (res)
|
||||
nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_validate_derived_1(struct nvc0_context *nvc0)
|
||||
{
|
||||
|
|
@ -513,6 +528,7 @@ static struct state_validate {
|
|||
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
|
||||
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
|
||||
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
|
||||
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
|
||||
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
|
||||
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
|
||||
};
|
||||
|
|
|
|||
|
|
@ -515,7 +515,7 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
|
|||
blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
|
||||
blit->vp.code_size = sizeof(code_nvc0);
|
||||
}
|
||||
blit->vp.max_gpr = 7;
|
||||
blit->vp.num_gprs = 7;
|
||||
blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
|
||||
|
||||
blit->vp.hdr[0] = 0x00020461; /* vertprog magic */
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "nvc0_context.h"
|
||||
#include "nvc0_resource.h"
|
||||
#include "nv50/nv50_texture.xml.h"
|
||||
#include "nv50/nv50_defs.xml.h"
|
||||
|
||||
#include "util/u_format.h"
|
||||
|
||||
|
|
@ -413,7 +414,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
|
|||
return need_flush;
|
||||
}
|
||||
|
||||
static boolean
|
||||
boolean
|
||||
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
|
||||
{
|
||||
struct nouveau_bo *txc = nvc0->screen->txc;
|
||||
|
|
@ -515,3 +516,295 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
|
|||
nvc0->samplers_dirty[s] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
|
||||
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
|
||||
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
|
||||
|
||||
void
|
||||
nve4_set_surface_info(struct nouveau_pushbuf *push,
|
||||
struct pipe_surface *psf,
|
||||
struct nvc0_screen *screen)
|
||||
{
|
||||
struct nv50_surface *sf = nv50_surface(psf);
|
||||
struct nv04_resource *res;
|
||||
uint64_t address;
|
||||
uint32_t *const info = push->cur;
|
||||
uint8_t log2cpp;
|
||||
|
||||
if (psf && !nve4_su_format_map[psf->format])
|
||||
NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
|
||||
|
||||
push->cur += 16;
|
||||
|
||||
if (!psf || !nve4_su_format_map[psf->format]) {
|
||||
memset(info, 0, 16 * sizeof(*info));
|
||||
|
||||
info[0] = 0xbadf0000;
|
||||
info[1] = 0x80004000;
|
||||
info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
|
||||
screen->lib_code->start;
|
||||
return;
|
||||
}
|
||||
res = nv04_resource(sf->base.texture);
|
||||
|
||||
address = res->address + sf->offset;
|
||||
|
||||
info[8] = sf->width;
|
||||
info[9] = sf->height;
|
||||
info[10] = sf->depth;
|
||||
switch (res->base.target) {
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
info[11] = 1;
|
||||
break;
|
||||
case PIPE_TEXTURE_2D:
|
||||
case PIPE_TEXTURE_RECT:
|
||||
info[11] = 2;
|
||||
break;
|
||||
case PIPE_TEXTURE_3D:
|
||||
info[11] = 3;
|
||||
break;
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
case PIPE_TEXTURE_CUBE:
|
||||
case PIPE_TEXTURE_CUBE_ARRAY:
|
||||
info[11] = 4;
|
||||
break;
|
||||
default:
|
||||
info[11] = 0;
|
||||
break;
|
||||
}
|
||||
log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
|
||||
|
||||
info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
|
||||
|
||||
/* limit in bytes for raw access */
|
||||
info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
|
||||
|
||||
info[1] = nve4_su_format_map[sf->base.format];
|
||||
|
||||
#if 0
|
||||
switch (util_format_get_blocksizebits(sf->base.format)) {
|
||||
case 16: info[1] |= 1 << 16; break;
|
||||
case 32: info[1] |= 2 << 16; break;
|
||||
case 64: info[1] |= 3 << 16; break;
|
||||
case 128: info[1] |= 4 << 16; break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#else
|
||||
info[1] |= log2cpp << 16;
|
||||
info[1] |= 0x4000;
|
||||
info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
|
||||
#endif
|
||||
|
||||
if (res->base.target == PIPE_BUFFER) {
|
||||
info[0] = address >> 8;
|
||||
info[2] = sf->width - 1;
|
||||
info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
|
||||
info[3] = 0;
|
||||
info[4] = 0;
|
||||
info[5] = 0;
|
||||
info[6] = 0;
|
||||
info[7] = 0;
|
||||
info[14] = 0;
|
||||
info[15] = 0;
|
||||
} else {
|
||||
struct nv50_miptree *mt = nv50_miptree(&res->base);
|
||||
struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
|
||||
const unsigned z = sf->base.u.tex.first_layer;
|
||||
|
||||
if (z) {
|
||||
if (mt->layout_3d) {
|
||||
address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
|
||||
/* doesn't work if z passes z-tile boundary */
|
||||
assert(sf->depth == 1);
|
||||
} else {
|
||||
address += mt->layer_stride * z;
|
||||
}
|
||||
}
|
||||
info[0] = address >> 8;
|
||||
info[2] = sf->width - 1;
|
||||
/* NOTE: this is really important: */
|
||||
info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
|
||||
info[3] = (0x88 << 24) | (lvl->pitch / 64);
|
||||
info[4] = sf->height - 1;
|
||||
info[4] |= (lvl->tile_mode & 0x0f0) << 25;
|
||||
info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
|
||||
info[5] = mt->layer_stride >> 8;
|
||||
info[6] = sf->depth - 1;
|
||||
info[6] |= (lvl->tile_mode & 0xf00) << 21;
|
||||
info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
|
||||
info[7] = 0;
|
||||
info[14] = mt->ms_x;
|
||||
info[15] = mt->ms_y;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nve4_update_surface_bindings(struct nvc0_context *nvc0)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
void
|
||||
nvc0_validate_surfaces(struct nvc0_context *nvc0)
|
||||
{
|
||||
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
|
||||
nve4_update_surface_bindings(nvc0);
|
||||
} else {
|
||||
nvc0_update_surface_bindings(nvc0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
|
||||
{
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
|
||||
[PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
|
||||
[PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
|
||||
[PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
|
||||
[PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
|
||||
[PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
|
||||
/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
|
||||
[PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
|
||||
[PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
|
||||
[PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
|
||||
[PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
|
||||
[PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
|
||||
[PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
|
||||
[PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
|
||||
[PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
|
||||
[PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
|
||||
[PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
|
||||
[PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
|
||||
[PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
|
||||
[PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
|
||||
[PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
|
||||
[PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
|
||||
[PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
|
||||
[PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
|
||||
[PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
|
||||
[PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
|
||||
[PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
|
||||
[PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
|
||||
[PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
|
||||
[PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
|
||||
};
|
||||
|
||||
/* Auxiliary format description values for surface instructions.
|
||||
* (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
|
||||
*/
|
||||
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
|
||||
{
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
|
||||
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
|
||||
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
|
||||
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
|
||||
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
|
||||
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
|
||||
[PIPE_FORMAT_R32G32_SINT] = 0x3433,
|
||||
[PIPE_FORMAT_R32G32_UINT] = 0x3433,
|
||||
|
||||
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
|
||||
/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
|
||||
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
|
||||
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
|
||||
[PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
|
||||
|
||||
[PIPE_FORMAT_R16G16_UNORM] = 0x2524,
|
||||
[PIPE_FORMAT_R16G16_SNORM] = 0x2524,
|
||||
[PIPE_FORMAT_R16G16_SINT] = 0x2524,
|
||||
[PIPE_FORMAT_R16G16_UINT] = 0x2524,
|
||||
[PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
|
||||
|
||||
[PIPE_FORMAT_R32_SINT] = 0x2024,
|
||||
[PIPE_FORMAT_R32_UINT] = 0x2024,
|
||||
[PIPE_FORMAT_R32_FLOAT] = 0x2024,
|
||||
|
||||
[PIPE_FORMAT_R8G8_UNORM] = 0x1615,
|
||||
[PIPE_FORMAT_R8G8_SNORM] = 0x1615,
|
||||
[PIPE_FORMAT_R8G8_SINT] = 0x1615,
|
||||
[PIPE_FORMAT_R8G8_UINT] = 0x1615,
|
||||
|
||||
[PIPE_FORMAT_R16_UNORM] = 0x1115,
|
||||
[PIPE_FORMAT_R16_SNORM] = 0x1115,
|
||||
[PIPE_FORMAT_R16_SINT] = 0x1115,
|
||||
[PIPE_FORMAT_R16_UINT] = 0x1115,
|
||||
[PIPE_FORMAT_R16_FLOAT] = 0x1115,
|
||||
|
||||
[PIPE_FORMAT_R8_UNORM] = 0x0206,
|
||||
[PIPE_FORMAT_R8_SNORM] = 0x0206,
|
||||
[PIPE_FORMAT_R8_SINT] = 0x0206,
|
||||
[PIPE_FORMAT_R8_UINT] = 0x0206
|
||||
};
|
||||
|
||||
/* NOTE: These are hardcoded offsets for the shader library.
|
||||
* TODO: Automate them.
|
||||
*/
|
||||
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
|
||||
{
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
|
||||
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
|
||||
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
|
||||
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = 0x428,
|
||||
[PIPE_FORMAT_R32G32_SINT] = 0x468,
|
||||
[PIPE_FORMAT_R32G32_UINT] = 0x468,
|
||||
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
|
||||
/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
|
||||
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
|
||||
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
|
||||
[PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
|
||||
[PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
|
||||
[PIPE_FORMAT_R16G16_UNORM] = 0x828,
|
||||
[PIPE_FORMAT_R16G16_SNORM] = 0x890,
|
||||
[PIPE_FORMAT_R16G16_SINT] = 0x8f0,
|
||||
[PIPE_FORMAT_R16G16_UINT] = 0x948,
|
||||
[PIPE_FORMAT_R16G16_FLOAT] = 0x998,
|
||||
[PIPE_FORMAT_R32_FLOAT] = 0x9e8,
|
||||
[PIPE_FORMAT_R32_SINT] = 0xa30,
|
||||
[PIPE_FORMAT_R32_UINT] = 0xa30,
|
||||
[PIPE_FORMAT_R8G8_UNORM] = 0xa78,
|
||||
[PIPE_FORMAT_R8G8_SNORM] = 0xae0,
|
||||
[PIPE_FORMAT_R8G8_UINT] = 0xb48,
|
||||
[PIPE_FORMAT_R8G8_SINT] = 0xb98,
|
||||
[PIPE_FORMAT_R16_UNORM] = 0xbe8,
|
||||
[PIPE_FORMAT_R16_SNORM] = 0xc48,
|
||||
[PIPE_FORMAT_R16_SINT] = 0xca0,
|
||||
[PIPE_FORMAT_R16_UINT] = 0xce8,
|
||||
[PIPE_FORMAT_R16_FLOAT] = 0xd30,
|
||||
[PIPE_FORMAT_R8_UNORM] = 0xd88,
|
||||
[PIPE_FORMAT_R8_SNORM] = 0xde0,
|
||||
[PIPE_FORMAT_R8_SINT] = 0xe38,
|
||||
[PIPE_FORMAT_R8_UINT] = 0xe88,
|
||||
[PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
|
||||
};
|
||||
|
|
|
|||
607
src/gallium/drivers/nvc0/nve4_compute.c
Normal file
607
src/gallium/drivers/nvc0/nve4_compute.c
Normal file
|
|
@ -0,0 +1,607 @@
|
|||
/*
|
||||
* Copyright 2012 Nouveau Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors: Christoph Bumiller
|
||||
*/
|
||||
|
||||
#include "nvc0_context.h"
|
||||
#include "nve4_compute.h"
|
||||
|
||||
#include "nv50/codegen/nv50_ir_driver.h"
|
||||
|
||||
static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
|
||||
|
||||
|
||||
int
|
||||
nve4_screen_compute_setup(struct nvc0_screen *screen,
|
||||
struct nouveau_pushbuf *push)
|
||||
{
|
||||
struct nouveau_device *dev = screen->base.device;
|
||||
struct nouveau_object *chan = screen->base.channel;
|
||||
unsigned i;
|
||||
int ret;
|
||||
uint32_t obj_class;
|
||||
|
||||
switch (dev->chipset & 0xf0) {
|
||||
case 0xf0:
|
||||
obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
|
||||
break;
|
||||
case 0xe0:
|
||||
obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
|
||||
&screen->compute);
|
||||
if (ret) {
|
||||
NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
|
||||
&screen->parm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
|
||||
PUSH_DATA (push, screen->compute->oclass);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->tls->offset);
|
||||
PUSH_DATA (push, screen->tls->offset);
|
||||
/* No idea why there are 2. Divide size by 2 to be safe.
|
||||
* Actually this might be per-MP TEMP size and looks like I'm only using
|
||||
* 2 MPs instead of all 8.
|
||||
*/
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3);
|
||||
PUSH_DATAh(push, screen->tls_size / 2);
|
||||
PUSH_DATA (push, screen->tls_size / 2);
|
||||
PUSH_DATA (push, 0xff);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3);
|
||||
PUSH_DATAh(push, screen->tls_size / 2);
|
||||
PUSH_DATA (push, screen->tls_size / 2);
|
||||
PUSH_DATA (push, 0xff);
|
||||
|
||||
/* Unified address space ? Who needs that ? Certainly not OpenCL.
|
||||
*
|
||||
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
|
||||
* accessible. We cannot prevent that at the moment, so expect failure.
|
||||
*/
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
|
||||
PUSH_DATA (push, 1 << 24);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
|
||||
PUSH_DATA (push, 2 << 24);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->text->offset);
|
||||
PUSH_DATA (push, screen->text->offset);
|
||||
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
|
||||
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
|
||||
|
||||
/* NOTE: these do not affect the state used by the 3D object */
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, screen->txc->offset);
|
||||
PUSH_DATA (push, screen->txc->offset);
|
||||
PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, screen->txc->offset + 65536);
|
||||
PUSH_DATA (push, screen->txc->offset + 65536);
|
||||
PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
|
||||
|
||||
if (obj_class >= NVF0_COMPUTE_CLASS) {
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
|
||||
PUSH_DATA (push, 0x100);
|
||||
BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
|
||||
for (i = 63; i >= 1; --i)
|
||||
PUSH_DATA(push, 0x38000 | i);
|
||||
IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
|
||||
IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
|
||||
}
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
|
||||
PUSH_DATA (push, 0); /* does not interefere with 3D */
|
||||
|
||||
if (obj_class >= NVF0_COMPUTE_CLASS)
|
||||
IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
|
||||
|
||||
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, 64);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
|
||||
PUSH_DATA (push, 0); /* 0 */
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 1); /* 1 */
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0); /* 2 */
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 1); /* 3 */
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 2); /* 4 */
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 3); /* 5 */
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 2); /* 6 */
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 3); /* 7 */
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nv50_surface *sf;
|
||||
struct nv04_resource *res;
|
||||
uint32_t mask;
|
||||
unsigned i;
|
||||
const unsigned t = 1;
|
||||
|
||||
mask = nvc0->surfaces_dirty[t];
|
||||
while (mask) {
|
||||
i = ffs(mask) - 1;
|
||||
mask &= ~(1 << i);
|
||||
|
||||
/*
|
||||
* NVE4's surface load/store instructions receive all the information
|
||||
* directly instead of via binding points, so we have to supply them.
|
||||
*/
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, 64);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
|
||||
|
||||
nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
|
||||
|
||||
sf = nv50_surface(nvc0->surfaces[t][i]);
|
||||
if (sf) {
|
||||
res = nv04_resource(sf->base.texture);
|
||||
|
||||
if (sf->base.writable)
|
||||
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
|
||||
else
|
||||
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
|
||||
}
|
||||
}
|
||||
if (nvc0->surfaces_dirty[t]) {
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
|
||||
}
|
||||
|
||||
/* re-reference non-dirty surfaces */
|
||||
mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
|
||||
while (mask) {
|
||||
i = ffs(mask) - 1;
|
||||
mask &= ~(1 << i);
|
||||
|
||||
sf = nv50_surface(nvc0->surfaces[t][i]);
|
||||
res = nv04_resource(sf->base.texture);
|
||||
|
||||
if (sf->base.writable)
|
||||
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
|
||||
else
|
||||
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
|
||||
}
|
||||
|
||||
nvc0->surfaces_dirty[t] = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Thankfully, textures with samplers follow the normal rules. */
|
||||
static void
|
||||
nve4_compute_validate_samplers(struct nvc0_context *nvc0)
|
||||
{
|
||||
boolean need_flush = nve4_validate_tsc(nvc0, 5);
|
||||
if (need_flush) {
|
||||
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
|
||||
PUSH_DATA (nvc0->base.pushbuf, 0);
|
||||
}
|
||||
}
|
||||
/* (Code duplicated at bottom for various non-convincing reasons.
|
||||
* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
|
||||
* entries to avoid a subchannel switch.
|
||||
* Same for texture cache flushes.
|
||||
* Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
|
||||
*/
|
||||
static void nve4_compute_validate_textures(struct nvc0_context *);
|
||||
|
||||
static void
|
||||
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
uint64_t address;
|
||||
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
|
||||
unsigned i, n;
|
||||
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
|
||||
|
||||
if (!dirty)
|
||||
return;
|
||||
i = ffs(dirty) - 1;
|
||||
n = util_logbase2(dirty) + 1 - i;
|
||||
assert(n);
|
||||
|
||||
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, address);
|
||||
PUSH_DATA (push, address);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, n * 4);
|
||||
PUSH_DATA (push, 0x1);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
|
||||
PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
|
||||
|
||||
nvc0->textures_dirty[s] = 0;
|
||||
nvc0->samplers_dirty[s] = 0;
|
||||
}
|
||||
|
||||
|
||||
static boolean
|
||||
nve4_compute_validate_program(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nvc0_program *prog = nvc0->compprog;
|
||||
|
||||
if (prog->mem)
|
||||
return TRUE;
|
||||
|
||||
if (!prog->translated) {
|
||||
prog->translated = nvc0_program_translate(
|
||||
prog, nvc0->screen->base.device->chipset);
|
||||
if (!prog->translated)
|
||||
return FALSE;
|
||||
}
|
||||
if (unlikely(!prog->code_size))
|
||||
return FALSE;
|
||||
|
||||
if (likely(prog->code_size)) {
|
||||
if (nvc0_program_upload_code(nvc0, prog)) {
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE);
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static boolean
|
||||
nve4_compute_state_validate(struct nvc0_context *nvc0)
|
||||
{
|
||||
if (!nve4_compute_validate_program(nvc0))
|
||||
return FALSE;
|
||||
if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
|
||||
nve4_compute_validate_textures(nvc0);
|
||||
if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
|
||||
nve4_compute_validate_samplers(nvc0);
|
||||
if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
|
||||
nve4_compute_set_tex_handles(nvc0);
|
||||
if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
|
||||
nve4_compute_validate_surfaces(nvc0);
|
||||
if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
|
||||
nvc0_validate_global_residents(nvc0,
|
||||
nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
|
||||
|
||||
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
|
||||
|
||||
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
|
||||
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
|
||||
return FALSE;
|
||||
if (unlikely(nvc0->state.flushed))
|
||||
nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_program *cp = nvc0->compprog;
|
||||
|
||||
if (!cp->parm_size)
|
||||
return;
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset);
|
||||
PUSH_DATA (push, screen->parm->offset);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, cp->parm_size);
|
||||
PUSH_DATA (push, 0x1);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
|
||||
PUSH_DATAp(push, input, cp->parm_size / 4);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
|
||||
}
|
||||
|
||||
static INLINE uint8_t
|
||||
nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
|
||||
{
|
||||
if (shared_size > (32 << 10))
|
||||
return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
|
||||
if (shared_size > (16 << 10))
|
||||
return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
|
||||
return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
|
||||
}
|
||||
|
||||
static void
|
||||
nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
|
||||
struct nve4_cp_launch_desc *desc,
|
||||
uint32_t label,
|
||||
const uint *block_layout,
|
||||
const uint *grid_layout)
|
||||
{
|
||||
const struct nvc0_screen *screen = nvc0->screen;
|
||||
const struct nvc0_program *cp = nvc0->compprog;
|
||||
unsigned i;
|
||||
|
||||
nve4_cp_launch_desc_init_default(desc);
|
||||
|
||||
desc->entry = nvc0_program_symbol_offset(cp, label);
|
||||
|
||||
desc->griddim_x = grid_layout[0];
|
||||
desc->griddim_y = grid_layout[1];
|
||||
desc->griddim_z = grid_layout[2];
|
||||
desc->blockdim_x = block_layout[0];
|
||||
desc->blockdim_y = block_layout[1];
|
||||
desc->blockdim_z = block_layout[2];
|
||||
|
||||
desc->shared_size = align(cp->cp.smem_size, 0x100);
|
||||
desc->local_size_p = align(cp->cp.lmem_size, 0x10);
|
||||
desc->local_size_n = 0;
|
||||
desc->cstack_size = 0x800;
|
||||
desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
|
||||
|
||||
desc->gpr_alloc = cp->num_gprs;
|
||||
desc->bar_alloc = cp->num_barriers;
|
||||
|
||||
for (i = 0; i < 7; ++i) {
|
||||
const unsigned s = 5;
|
||||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
|
||||
}
|
||||
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
|
||||
}
|
||||
|
||||
static INLINE struct nve4_cp_launch_desc *
|
||||
nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
|
||||
struct nouveau_bo **pbo, uint64_t *pgpuaddr)
|
||||
{
|
||||
uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
|
||||
if (!ptr)
|
||||
return NULL;
|
||||
if (*pgpuaddr & 255) {
|
||||
unsigned adj = 256 - (*pgpuaddr & 255);
|
||||
ptr += adj;
|
||||
*pgpuaddr += adj;
|
||||
}
|
||||
return (struct nve4_cp_launch_desc *)ptr;
|
||||
}
|
||||
|
||||
void
|
||||
nve4_launch_grid(struct pipe_context *pipe,
|
||||
const uint *block_layout, const uint *grid_layout,
|
||||
uint32_t label,
|
||||
const void *input)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nve4_cp_launch_desc *desc;
|
||||
uint64_t desc_gpuaddr;
|
||||
struct nouveau_bo *desc_bo;
|
||||
int ret;
|
||||
|
||||
desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
|
||||
if (!desc)
|
||||
goto out;
|
||||
BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
|
||||
desc_bo);
|
||||
|
||||
ret = !nve4_compute_state_validate(nvc0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
|
||||
nve4_compute_dump_launch_desc(desc);
|
||||
|
||||
nve4_compute_upload_input(nvc0, input);
|
||||
|
||||
/* upload descriptor and flush */
|
||||
#if 0
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, desc_gpuaddr);
|
||||
PUSH_DATA (push, desc_gpuaddr);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, 256);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC);
|
||||
PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
|
||||
#endif
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
|
||||
PUSH_DATA (push, desc_gpuaddr >> 8);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
|
||||
PUSH_DATA (push, 0x3);
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
out:
|
||||
if (ret)
|
||||
NOUVEAU_ERR("Failed to launch grid !\n");
|
||||
nouveau_scratch_done(&nvc0->base);
|
||||
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
|
||||
}
|
||||
|
||||
|
||||
#define NVE4_TIC_ENTRY_INVALID 0x000fffff
|
||||
|
||||
static void
|
||||
nve4_compute_validate_textures(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nouveau_bo *txc = nvc0->screen->txc;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
const unsigned s = 5;
|
||||
unsigned i;
|
||||
uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
|
||||
unsigned n[2] = { 0, 0 };
|
||||
|
||||
for (i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
struct nv04_resource *res;
|
||||
const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
|
||||
|
||||
if (!tic) {
|
||||
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
|
||||
continue;
|
||||
}
|
||||
res = nv04_resource(tic->pipe.texture);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
|
||||
|
||||
PUSH_SPACE(push, 16);
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, txc->offset + (tic->id * 32));
|
||||
PUSH_DATA (push, txc->offset + (tic->id * 32));
|
||||
BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_SIZE), 2);
|
||||
PUSH_DATA (push, 32);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL);
|
||||
BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
|
||||
PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA);
|
||||
PUSH_DATAp(push, &tic->tic[0], 8);
|
||||
|
||||
commands[0][n[0]++] = (tic->id << 4) | 1;
|
||||
} else
|
||||
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
|
||||
commands[1][n[1]++] = (tic->id << 4) | 1;
|
||||
}
|
||||
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
|
||||
|
||||
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
|
||||
|
||||
nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
|
||||
nvc0->tex_handles[s][i] |= tic->id;
|
||||
if (dirty)
|
||||
BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
|
||||
}
|
||||
for (; i < nvc0->state.num_textures[s]; ++i)
|
||||
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
|
||||
|
||||
if (n[0]) {
|
||||
BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
|
||||
PUSH_DATAp(push, commands[0], n[0]);
|
||||
}
|
||||
if (n[1]) {
|
||||
BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
|
||||
PUSH_DATAp(push, commands[1], n[1]);
|
||||
}
|
||||
|
||||
nvc0->state.num_textures[s] = nvc0->num_textures[s];
|
||||
}
|
||||
|
||||
|
||||
static const char *nve4_cache_split_name(unsigned value)
|
||||
{
|
||||
switch (value) {
|
||||
case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
|
||||
case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
|
||||
case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
|
||||
default:
|
||||
return "(invalid)";
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
|
||||
{
|
||||
const uint32_t *data = (const uint32_t *)desc;
|
||||
unsigned i;
|
||||
boolean zero = FALSE;
|
||||
|
||||
debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
|
||||
|
||||
for (i = 0; i < sizeof(*desc); i += 4) {
|
||||
if (data[i / 4]) {
|
||||
debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
|
||||
zero = FALSE;
|
||||
} else
|
||||
if (!zero) {
|
||||
debug_printf("...\n");
|
||||
zero = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
debug_printf("entry = 0x%x\n", desc->entry);
|
||||
debug_printf("grid dimensions = %ux%ux%u\n",
|
||||
desc->griddim_x, desc->griddim_y, desc->griddim_z);
|
||||
debug_printf("block dimensions = %ux%ux%u\n",
|
||||
desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
|
||||
debug_printf("s[] size: 0x%x\n", desc->shared_size);
|
||||
debug_printf("l[] size: -0x%x / +0x%x\n",
|
||||
desc->local_size_n, desc->local_size_p);
|
||||
debug_printf("stack size: 0x%x\n", desc->cstack_size);
|
||||
debug_printf("barrier count: %u\n", desc->bar_alloc);
|
||||
debug_printf("$r count: %u\n", desc->gpr_alloc);
|
||||
debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
uint64_t address;
|
||||
uint32_t size = desc->cb[i].size;
|
||||
boolean valid = !!(desc->cb_mask & (1 << i));
|
||||
|
||||
address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
|
||||
|
||||
if (!valid && !address && !size)
|
||||
continue;
|
||||
debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
|
||||
i, address, size, valid ? "" : " (invalid)");
|
||||
}
|
||||
}
|
||||
|
||||
110
src/gallium/drivers/nvc0/nve4_compute.h
Normal file
110
src/gallium/drivers/nvc0/nve4_compute.h
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
|
||||
#ifndef NVE4_COMPUTE_H
|
||||
#define NVE4_COMPUTE_H
|
||||
|
||||
#include "nv50/nv50_defs.xml.h"
|
||||
#include "nve4_compute.xml.h"
|
||||
|
||||
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
|
||||
*/
|
||||
#define NVE4_CP_INPUT_USER 0x0000
|
||||
#define NVE4_CP_INPUT_USER_LIMIT 0x1000
|
||||
#define NVE4_CP_INPUT_TEX(i) (0x1020 + (i) * 4)
|
||||
#define NVE4_CP_INPUT_TEX_STRIDE 4
|
||||
#define NVE4_CP_INPUT_TEX_MAX 32
|
||||
#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
|
||||
#define NVE4_CP_INPUT_SUF_STRIDE 64
|
||||
#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
|
||||
#define NVE4_CP_INPUT_SUF_MAX 32
|
||||
#define NVE4_CP_INPUT_SIZE 0x1900
|
||||
#define NVE4_CP_PARAM_SIZE 0x2000
|
||||
|
||||
struct nve4_cp_launch_desc
|
||||
{
|
||||
u32 unk0[8];
|
||||
u32 entry;
|
||||
u32 unk9[3];
|
||||
u32 griddim_x : 31;
|
||||
u32 unk12 : 1;
|
||||
u16 griddim_y;
|
||||
u16 griddim_z;
|
||||
u32 unk14[3];
|
||||
u16 shared_size; /* must be aligned to 0x100 */
|
||||
u16 unk15;
|
||||
u16 unk16;
|
||||
u16 blockdim_x;
|
||||
u16 blockdim_y;
|
||||
u16 blockdim_z;
|
||||
u32 cb_mask : 8;
|
||||
u32 unk20_8 : 21;
|
||||
u32 cache_split : 2;
|
||||
u32 unk20_31 : 1;
|
||||
u32 unk21[8];
|
||||
struct {
|
||||
u32 address_l;
|
||||
u32 address_h : 8;
|
||||
u32 reserved : 7;
|
||||
u32 size : 17;
|
||||
} cb[8];
|
||||
u32 local_size_p : 20;
|
||||
u32 unk45_20 : 7;
|
||||
u32 bar_alloc : 5;
|
||||
u32 local_size_n : 20;
|
||||
u32 unk46_20 : 4;
|
||||
u32 gpr_alloc : 8;
|
||||
u32 cstack_size : 20;
|
||||
u32 unk47_20 : 12;
|
||||
u32 unk48[16];
|
||||
};
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DATA 0x41
|
||||
#define NVE4_COMPUTE_UPLOAD_EXEC_UNKVAL_DESC 0x11
|
||||
#define NVE4_COMPUTE_UPLOAD_UNK0184_UNKVAL 0x1
|
||||
|
||||
static INLINE void
|
||||
nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
|
||||
{
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
desc->unk0[7] = 0xbc000000;
|
||||
desc->unk9[2] = 0x44014000;
|
||||
desc->unk47_20 = 0x300;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
|
||||
unsigned index,
|
||||
struct nouveau_bo *bo,
|
||||
uint32_t base, uint16_t size)
|
||||
{
|
||||
uint64_t address = bo->offset + base;
|
||||
|
||||
assert(index < 8);
|
||||
assert(!(base & 0xff));
|
||||
assert(size <= 65536);
|
||||
|
||||
desc->cb[index].address_l = address;
|
||||
desc->cb[index].address_h = address >> 32;
|
||||
desc->cb[index].size = size;
|
||||
|
||||
desc->cb_mask |= 1 << index;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
|
||||
unsigned index,
|
||||
const struct nvc0_constbuf *cb)
|
||||
{
|
||||
assert(index < 8);
|
||||
|
||||
if (!cb->u.buf) {
|
||||
desc->cb_mask &= ~(1 << index);
|
||||
} else {
|
||||
const struct nv04_resource *buf = nv04_resource(cb->u.buf);
|
||||
assert(!cb->user);
|
||||
nve4_cp_launch_desc_set_cb(desc, index,
|
||||
buf->bo, buf->offset + cb->offset, cb->size);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* NVE4_COMPUTE_H */
|
||||
269
src/gallium/drivers/nvc0/nve4_compute.xml.h
Normal file
269
src/gallium/drivers/nvc0/nve4_compute.xml.h
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
#ifndef NVE4_COMPUTE_XML
|
||||
#define NVE4_COMPUTE_XML
|
||||
|
||||
/* Autogenerated file, DO NOT EDIT manually!
|
||||
|
||||
This file was generated by the rules-ng-ng headergen tool in this git repository:
|
||||
http://0x04.net/cgit/index.cgi/rules-ng-ng
|
||||
git clone git://0x04.net/rules-ng-ng
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- nve4_compute.xml ( 6352 bytes, from 2013-03-10 14:59:45)
|
||||
- copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
|
||||
- nvchipsets.xml ( 3870 bytes, from 2013-03-08 12:41:50)
|
||||
- nv_object.xml ( 13238 bytes, from 2013-02-07 16:35:34)
|
||||
- nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
|
||||
- nv50_defs.xml ( 7783 bytes, from 2013-03-08 12:42:29)
|
||||
|
||||
Copyright (C) 2006-2013 by the following authors:
|
||||
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
|
||||
- Ben Skeggs (darktama, darktama_)
|
||||
- B. R. <koala_br@users.sourceforge.net> (koala_br)
|
||||
- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
|
||||
- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
|
||||
- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
|
||||
- Dmitry Baryshkov
|
||||
- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
|
||||
- EdB <edb_@users.sf.net> (edb_)
|
||||
- Erik Waling <erikwailing@users.sf.net> (erikwaling)
|
||||
- Francisco Jerez <currojerez@riseup.net> (curro)
|
||||
- imirkin <imirkin@users.sf.net> (imirkin)
|
||||
- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
|
||||
- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
|
||||
- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
|
||||
- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
|
||||
- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
|
||||
- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
|
||||
- Mark Carey <mark.carey@gmail.com> (careym)
|
||||
- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
|
||||
- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
|
||||
- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
|
||||
- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
|
||||
- Peter Popov <ironpeter@users.sf.net> (ironpeter)
|
||||
- Richard Hughes <hughsient@users.sf.net> (hughsient)
|
||||
- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
|
||||
- Serge Martin
|
||||
- Simon Raffeiner
|
||||
- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
|
||||
- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
|
||||
- sturmflut <sturmflut@users.sf.net> (sturmflut)
|
||||
- Sylvain Munaut <tnt@246tNt.com>
|
||||
- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
|
||||
- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
|
||||
- Younes Manton <younes.m@gmail.com> (ymanton)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_SIZE 0x00000180
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_UNK0184 0x00000184
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_ADDRESS_HIGH 0x00000188
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_ADDRESS_LOW 0x0000018c
|
||||
|
||||
#define NVE4_COMPUTE_UNK01A0 0x000001a0
|
||||
|
||||
#define NVE4_COMPUTE_UNK01A4 0x000001a4
|
||||
|
||||
#define NVE4_COMPUTE_UNK01A8 0x000001a8
|
||||
|
||||
#define NVE4_COMPUTE_UNK01AC 0x000001ac
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0
|
||||
|
||||
#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4
|
||||
|
||||
#define NVE4_COMPUTE_SHARED_BASE 0x00000214
|
||||
|
||||
#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c
|
||||
|
||||
#define NVE4_COMPUTE_UNK0280 0x00000280
|
||||
|
||||
#define NVE4_COMPUTE_UNK02B0 0x000002b0
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8
|
||||
|
||||
#define NVE4_COMPUTE_UNK02B8 0x000002b8
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH 0x000002bc
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
|
||||
#define NVE4_COMPUTE_TEMP_SIZE__ESIZE 0x0000000c
|
||||
#define NVE4_COMPUTE_TEMP_SIZE__LEN 0x00000002
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
|
||||
|
||||
#define NVE4_COMPUTE_UNK0310 0x00000310
|
||||
|
||||
#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
|
||||
|
||||
#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
|
||||
|
||||
#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4
|
||||
|
||||
#define NVE4_COMPUTE_LINKED_TSC 0x00001234
|
||||
|
||||
#define NVE4_COMPUTE_TSC_FLUSH 0x00001330
|
||||
#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
|
||||
#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
|
||||
#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
|
||||
|
||||
#define NVE4_COMPUTE_TIC_FLUSH 0x00001334
|
||||
#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
|
||||
#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
|
||||
#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
|
||||
|
||||
#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338
|
||||
#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
|
||||
#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
|
||||
#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
|
||||
#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
|
||||
|
||||
#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550
|
||||
|
||||
#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554
|
||||
|
||||
#define NVE4_COMPUTE_COND_MODE 0x00001558
|
||||
#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000
|
||||
#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001
|
||||
#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
|
||||
#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003
|
||||
#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
|
||||
|
||||
#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
|
||||
|
||||
#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560
|
||||
|
||||
#define NVE4_COMPUTE_TSC_LIMIT 0x00001564
|
||||
|
||||
#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
|
||||
|
||||
#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578
|
||||
|
||||
#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c
|
||||
|
||||
#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
|
||||
|
||||
#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
|
||||
|
||||
#define NVE4_COMPUTE_FLUSH 0x00001698
|
||||
#define NVE4_COMPUTE_FLUSH_CODE 0x00000001
|
||||
#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010
|
||||
#define NVE4_COMPUTE_FLUSH_UNK8 0x00000100
|
||||
#define NVE4_COMPUTE_FLUSH_CB 0x00001000
|
||||
|
||||
#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
|
||||
|
||||
#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
|
||||
|
||||
#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08
|
||||
|
||||
#define NVE4_COMPUTE_QUERY_GET 0x00001b0c
|
||||
#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
|
||||
#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0
|
||||
#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
|
||||
#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
|
||||
#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000
|
||||
#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000
|
||||
|
||||
#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608
|
||||
|
||||
#define NVE4_COMPUTE_UNK260c 0x0000260c
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0))
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0))
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24
|
||||
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff
|
||||
#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0
|
||||
|
||||
|
||||
#endif /* NVE4_COMPUTE_XML */
|
||||
Loading…
Add table
Reference in a new issue