diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 00a468e31c8..5c2210e5f87 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -692,6 +692,8 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms, off += 16 * 2 * 4; if (prog->getType() > Program::TYPE_GEOMETRY) off += 16 * 2 * 4; + if (prog->getType() > Program::TYPE_FRAGMENT) + off += 16 * 2 * 4; *ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol( FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL); *ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol( diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index b22424281c2..447f10e3acb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -142,7 +142,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50) for (i = 0; i < nv50->num_vtxbufs; ++i) pipe_vertex_buffer_unreference(&nv50->vtxbuf[i]); - for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) { + for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) { assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_textures[s]; ++i) pipe_sampler_view_reference(&nv50->textures[s][i], NULL); @@ -232,28 +232,38 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, } } - for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) { + for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) { assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_textures[s]; ++i) { if (nv50->textures[s][i] && nv50->textures[s][i]->texture == res) { - nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); + if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) { + nv50->dirty_cp |= NV50_NEW_CP_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES); + } else { + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); + } if (!--ref) return ref; } } } - for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) { + for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) { for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) { if (!(nv50->constbuf_valid[s] & (1 << i))) continue; if (!nv50->constbuf[s][i].user && nv50->constbuf[s][i].u.buf == res) { - nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; nv50->constbuf_dirty[s] |= 1 << i; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); + if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) { + nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF; + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_CB(i)); + } else { + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); + } if (!--ref) return ref; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 986c2687003..78cd8ce0cef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -25,6 +25,7 @@ #include "nv50/nv50_3ddefs.xml.h" #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_2d.xml.h" +#include "nv50/nv50_compute.xml.h" // NOTE: the VS/GS/FS order is based on how command methods are laid out for // TSC/TIC setting. @@ -58,10 +59,15 @@ #define NV50_NEW_3D_STRMOUT (1 << 21) #define NV50_NEW_3D_MIN_SAMPLES (1 << 22) #define NV50_NEW_3D_WINDOW_RECTS (1 << 23) -#define NV50_NEW_3D_CONTEXT (1 << 31) #define NV50_NEW_CP_PROGRAM (1 << 0) -#define NV50_NEW_CP_GLOBALS (1 << 1) +#define NV50_NEW_CP_SURFACES (1 << 1) +#define NV50_NEW_CP_TEXTURES (1 << 2) +#define NV50_NEW_CP_SAMPLERS (1 << 3) +#define NV50_NEW_CP_CONSTBUF (1 << 4) +#define NV50_NEW_CP_GLOBALS (1 << 5) +#define NV50_NEW_CP_DRIVERCONST (1 << 6) +#define NV50_NEW_CP_BUFFERS (1 << 7) /* 3d bufctx (during draw_vbo, blit_3d) */ #define NV50_BIND_3D_FB 0 @@ -76,21 +82,25 @@ #define NV50_BIND_3D_COUNT 56 /* compute bufctx (during launch_grid) */ -#define NV50_BIND_CP_GLOBAL 0 -#define NV50_BIND_CP_SCREEN 1 -#define NV50_BIND_CP_QUERY 2 -#define NV50_BIND_CP_COUNT 3 +#define NV50_BIND_CP_CB(i) ( 0 + (i)) +#define NV50_BIND_CP_TEXTURES 16 +#define NV50_BIND_CP_SUF 17 +#define NV50_BIND_CP_BUF 18 +#define NV50_BIND_CP_GLOBAL 19 +#define NV50_BIND_CP_SCREEN 20 +#define NV50_BIND_CP_QUERY 21 +#define NV50_BIND_CP_COUNT 22 /* bufctx for other operations */ #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 -#define NV50_CB_TMP 123 /* fixed constant buffer binding points - low indices for user's constbufs */ -#define NV50_CB_PVP 124 -#define NV50_CB_PGP 126 +#define NV50_CB_PVP 123 +#define NV50_CB_PGP 124 #define NV50_CB_PFP 125 +#define NV50_CB_PCP 126 /* constant buffer permanently mapped in as c15[] */ #define NV50_CB_AUX 127 /* size of the buffer: 64k. not all taken up, can be reduced if needed. */ @@ -98,17 +108,17 @@ /* 8 user clip planes, at 4 32-bit floats each */ #define NV50_CB_AUX_UCP_OFFSET 0x0000 #define NV50_CB_AUX_UCP_SIZE (8 * 4 * 4) -/* 16 textures * NV50_MAX_3D_SHADER_STAGES shaders, each with ms_x, ms_y u32 pairs */ +/* 16 textures * NV50_MAX_SHADER_STAGES shaders, each with ms_x, ms_y u32 pairs */ #define NV50_CB_AUX_TEX_MS_OFFSET 0x0080 -#define NV50_CB_AUX_TEX_MS_SIZE (16 * NV50_MAX_3D_SHADER_STAGES * 2 * 4) +#define NV50_CB_AUX_TEX_MS_SIZE (16 * NV50_MAX_SHADER_STAGES * 2 * 4) /* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */ -#define NV50_CB_AUX_MS_OFFSET 0x200 +#define NV50_CB_AUX_MS_OFFSET 0x280 #define NV50_CB_AUX_MS_SIZE (4 * 8 * 4 * 2) /* Sample position pairs for the current output MS level */ -#define NV50_CB_AUX_SAMPLE_OFFSET 0x300 +#define NV50_CB_AUX_SAMPLE_OFFSET 0x380 #define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2) /* Alpha test ref value */ -#define NV50_CB_AUX_ALPHATEST_OFFSET 0x340 +#define NV50_CB_AUX_ALPHATEST_OFFSET 0x3c0 #define NV50_CB_AUX_ALPHATEST_SIZE (4) /* next spot: 0x344 */ /* 4 32-bit floats for the vertex runout, put at the end */ @@ -145,10 +155,10 @@ struct nv50_context { struct nv50_program *fragprog; struct nv50_program *compprog; - struct nv50_constbuf constbuf[NV50_MAX_3D_SHADER_STAGES][NV50_MAX_PIPE_CONSTBUFS]; - uint16_t constbuf_dirty[NV50_MAX_3D_SHADER_STAGES]; - uint16_t constbuf_valid[NV50_MAX_3D_SHADER_STAGES]; - uint16_t constbuf_coherent[NV50_MAX_3D_SHADER_STAGES]; + struct nv50_constbuf constbuf[NV50_MAX_SHADER_STAGES][NV50_MAX_PIPE_CONSTBUFS]; + uint16_t constbuf_dirty[NV50_MAX_SHADER_STAGES]; + uint16_t constbuf_valid[NV50_MAX_SHADER_STAGES]; + uint16_t constbuf_coherent[NV50_MAX_SHADER_STAGES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; @@ -161,11 +171,11 @@ struct nv50_context { uint32_t instance_off; /* base vertex for instanced arrays */ uint32_t instance_max; /* max instance for current draw call */ - struct pipe_sampler_view *textures[NV50_MAX_3D_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - unsigned num_textures[NV50_MAX_3D_SHADER_STAGES]; - uint32_t textures_coherent[NV50_MAX_3D_SHADER_STAGES]; - struct nv50_tsc_entry *samplers[NV50_MAX_3D_SHADER_STAGES][PIPE_MAX_SAMPLERS]; - unsigned num_samplers[NV50_MAX_3D_SHADER_STAGES]; + struct pipe_sampler_view *textures[NV50_MAX_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + unsigned num_textures[NV50_MAX_SHADER_STAGES]; + uint32_t textures_coherent[NV50_MAX_SHADER_STAGES]; + struct nv50_tsc_entry *samplers[NV50_MAX_SHADER_STAGES][PIPE_MAX_SAMPLERS]; + unsigned num_samplers[NV50_MAX_SHADER_STAGES]; bool seamless_cube_map; uint8_t num_so_targets; @@ -269,7 +279,9 @@ extern void nv50_clear(struct pipe_context *, unsigned buffers, extern void nv50_init_surface_functions(struct nv50_context *); /* nv50_tex.c */ +bool nv50_validate_tic(struct nv50_context *nv50, int s); void nv50_validate_textures(struct nv50_context *); +bool nv50_validate_tsc(struct nv50_context *nv50, int s); void nv50_validate_samplers(struct nv50_context *); void nv50_upload_ms_info(struct nouveau_pushbuf *); void nv50_upload_tsc0(struct nv50_context *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 54909914a61..7a791d90de9 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -769,8 +769,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); - PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); - PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); + PUSH_DATAh(push, screen->uniforms->offset + (4 << 16)); + PUSH_DATA (push, screen->uniforms->offset + (4 << 16)); PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff)); BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); @@ -787,8 +787,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); - PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); + PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); + PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); nv50_upload_ms_info(push); @@ -1157,7 +1157,7 @@ nv50_screen_create(struct nouveau_device *dev) debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n", screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 5 << 16, NULL, &screen->uniforms); if (ret) { NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 6f3b40c37b8..6584b625328 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -39,7 +39,7 @@ struct nv50_graph_state { uint32_t semantic_psize; int32_t index_bias; uint32_t clip_mode; - bool uniform_buffer_bound[3]; + bool uniform_buffer_bound[4]; bool prim_restart; bool point_sprite; bool rt_serialize; @@ -49,8 +49,8 @@ struct nv50_graph_state { bool new_tls_space; uint8_t num_vtxbufs; uint8_t num_vtxelts; - uint8_t num_textures[3]; - uint8_t num_samplers[3]; + uint8_t num_textures[4]; + uint8_t num_samplers[4]; uint8_t prim_size; uint16_t scissor; bool seamless_cube_map; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 6da09a9bdde..5a3b911bfe1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -109,6 +109,11 @@ nv50_constbufs_validate(struct nv50_context *nv50) } } } + + /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ + nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF; + nv50->constbuf_dirty[NV50_SHADER_STAGE_COMPUTE] |= nv50->constbuf_valid[NV50_SHADER_STAGE_COMPUTE]; + nv50->state.uniform_buffer_bound[NV50_SHADER_STAGE_COMPUTE] = false; } static bool diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 020d39ac14d..d6c7e51aca4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -592,7 +592,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { unsigned s, i; - for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) { + for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) { assert(nv50_context(pipe)->num_samplers[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) if (nv50_context(pipe)->samplers[s][i] == hwcso) @@ -626,8 +626,6 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS); if (nr >= nv50->num_samplers[s]) nv50->num_samplers[s] = highest_found + 1; - - nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; } static void @@ -638,9 +636,13 @@ nv50_bind_sampler_states(struct pipe_context *pipe, unsigned s = nv50_context_shader_stage(shader); assert(start == 0); - assert(s != NV50_SHADER_STAGE_COMPUTE); nv50_stage_sampler_states_bind(nv50_context(pipe), s, num_samplers, samplers); + + if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) + nv50_context(pipe)->dirty_cp |= NV50_NEW_CP_SAMPLERS; + else + nv50_context(pipe)->dirty_3d |= NV50_NEW_3D_SAMPLERS; } @@ -696,10 +698,6 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, } nv50->num_textures[s] = nr; - - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - - nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; } static void @@ -708,11 +706,21 @@ nv50_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned unbind_num_trailing_slots, struct pipe_sampler_view **views) { + struct nv50_context *nv50 = nv50_context(pipe); unsigned s = nv50_context_shader_stage(shader); assert(start == 0); - assert(s != NV50_SHADER_STAGE_COMPUTE); - nv50_stage_set_sampler_views(nv50_context(pipe), s, nr, views); + nv50_stage_set_sampler_views(nv50, s, nr, views); + + if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) { + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES); + + nv50->dirty_cp |= NV50_NEW_CP_TEXTURES; + } else { + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); + + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; + } } @@ -871,17 +879,27 @@ nv50_set_constant_buffer(struct pipe_context *pipe, const unsigned s = nv50_context_shader_stage(shader); const unsigned i = index; - if (shader == PIPE_SHADER_COMPUTE) - return; + if (unlikely(shader == PIPE_SHADER_COMPUTE)) { + if (nv50->constbuf[s][i].user) + nv50->constbuf[s][i].u.buf = NULL; + else + if (nv50->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_CB(i)); - assert(i < NV50_MAX_PIPE_CONSTBUFS); - if (nv50->constbuf[s][i].user) - nv50->constbuf[s][i].u.buf = NULL; - else - if (nv50->constbuf[s][i].u.buf) { - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); - nv04_resource(nv50->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); + nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF; + } else { + if (nv50->constbuf[s][i].user) + nv50->constbuf[s][i].u.buf = NULL; + else + if (nv50->constbuf[s][i].u.buf) + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); + + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; } + nv50->constbuf_dirty[s] |= 1 << i; + + if (nv50->constbuf[s][i].u.buf) + nv04_resource(nv50->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); if (take_ownership) { pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL); @@ -897,21 +915,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe, nv50->constbuf_valid[s] |= 1 << i; nv50->constbuf_coherent[s] &= ~(1 << i); } else - if (res) { + if (cb) { nv50->constbuf[s][i].offset = cb->buffer_offset; nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000); nv50->constbuf_valid[s] |= 1 << i; - if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) nv50->constbuf_coherent[s] |= 1 << i; else nv50->constbuf_coherent[s] &= ~(1 << i); - } else { + } + else { nv50->constbuf_valid[s] &= ~(1 << i); nv50->constbuf_coherent[s] &= ~(1 << i); } - nv50->constbuf_dirty[s] |= 1 << i; - - nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; } /* ============================================================================= diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index 2bd49e77210..abeb9160abc 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -235,13 +235,14 @@ nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic, tic->tic[2] |= address >> 32; } -static bool +bool nv50_validate_tic(struct nv50_context *nv50, int s) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nouveau_bo *txc = nv50->screen->txc; unsigned i; bool need_flush = false; + const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE; assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_textures[s]; ++i) { @@ -249,7 +250,10 @@ nv50_validate_tic(struct nv50_context *nv50, int s) struct nv04_resource *res; if (!tic) { - BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + if (unlikely(is_compute_stage)) + BEGIN_NV04(push, NV50_CP(BIND_TIC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); continue; } @@ -288,7 +292,10 @@ nv50_validate_tic(struct nv50_context *nv50, int s) need_flush = true; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { - BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); + if (unlikely(is_compute_stage)) + BEGIN_NV04(push, NV50_CP(TEX_CACHE_CTL), 1); + else + BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, 0x20); } @@ -297,19 +304,32 @@ nv50_validate_tic(struct nv50_context *nv50, int s) res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD); - - BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + if (unlikely(is_compute_stage)) { + BCTX_REFN(nv50->bufctx_cp, CP_TEXTURES, res, RD); + BEGIN_NV04(push, NV50_CP(BIND_TIC), 1); + } else { + BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD); + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + } PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); } for (; i < nv50->state.num_textures[s]; ++i) { - BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); + if (unlikely(is_compute_stage)) + BEGIN_NV04(push, NV50_CP(BIND_TIC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); } if (nv50->num_textures[s]) { - BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); + if (unlikely(is_compute_stage)) + BEGIN_NV04(push, NV50_CP(CB_ADDR), 1); + else + BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX); - BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2); + if (unlikely(is_compute_stage)) + BEGIN_NV04(push, NV50_CP(CB_DATA(0)), nv50->num_textures[s] * 2); + else + BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2); for (i = 0; i < nv50->num_textures[s]; i++) { struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); struct nv50_miptree *res; @@ -341,21 +361,29 @@ void nv50_validate_textures(struct nv50_context *nv50) BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); } + + /* Invalidate all CP textures because they are aliased. */ + nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES); + nv50->dirty_cp |= NV50_NEW_CP_TEXTURES; } -static bool +bool nv50_validate_tsc(struct nv50_context *nv50, int s) { struct nouveau_pushbuf *push = nv50->base.pushbuf; unsigned i; bool need_flush = false; + const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE; assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_samplers[s]; ++i) { struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]); if (!tsc) { - BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + if (is_compute_stage) + BEGIN_NV04(push, NV50_CP(BIND_TSC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); PUSH_DATA (push, (i << 4) | 0); continue; } @@ -370,11 +398,17 @@ nv50_validate_tsc(struct nv50_context *nv50, int s) } nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); - BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + if (is_compute_stage) + BEGIN_NV04(push, NV50_CP(BIND_TSC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1); } for (; i < nv50->state.num_samplers[s]; ++i) { - BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + if (is_compute_stage) + BEGIN_NV04(push, NV50_CP(BIND_TSC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); PUSH_DATA (push, (i << 4) | 0); } nv50->state.num_samplers[s] = nv50->num_samplers[s]; @@ -385,7 +419,10 @@ nv50_validate_tsc(struct nv50_context *nv50, int s) // entry is initialized, we're good to go. This is the only bit that has // any effect on what TXF does. if (!nv50->samplers[s][0]) { - BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + if (is_compute_stage) + BEGIN_NV04(push, NV50_CP(BIND_TSC), 1); + else + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); PUSH_DATA (push, 1); } @@ -401,9 +438,16 @@ void nv50_validate_samplers(struct nv50_context *nv50) need_flush |= nv50_validate_tsc(nv50, s); if (need_flush) { - BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1); + if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) + // TODO(pmoreau): Is this needed? Not done on nvc0 + BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1); + else + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); } + + /* Invalidate all CP samplers because they are aliased. */ + nv50->dirty_cp |= NV50_NEW_CP_SAMPLERS; } /* There can be up to 4 different MS levels (1, 2, 4, 8). To simplify the diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index 68005547323..dd308e52258 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -535,8 +535,7 @@ nv50_cb_push(struct nouveau_context *nv, /* Go through all the constbuf binding points of this buffer and try to * find one which contains the region to be updated. */ - /* XXX compute? */ - for (s = 0; s < NV50_MAX_3D_SHADER_STAGES && !cb; s++) { + for (s = 0; s < NV50_MAX_SHADER_STAGES && !cb; s++) { uint16_t bindings = res->cb_bindings[s]; while (bindings) { int i = ffs(bindings) - 1;