nv50: adapt texture and constbuf paths for compute shaders

This contains the logic updates necessary to perform necessary resource
tracking and emit update / flush commands for the relevant stages.

Inspired by some changes from Pierre Moreau.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Pierre Moreau <dev@pmoreau.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9299>
This commit is contained in:
Ilia Mirkin 2021-02-24 19:27:49 -05:00
parent 9abfd4ba18
commit ba6ba8c990
9 changed files with 168 additions and 80 deletions

View file

@ -692,6 +692,8 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
off += 16 * 2 * 4;
if (prog->getType() > Program::TYPE_GEOMETRY)
off += 16 * 2 * 4;
if (prog->getType() > Program::TYPE_FRAGMENT)
off += 16 * 2 * 4;
*ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL);
*ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol(

View file

@ -142,7 +142,7 @@ nv50_context_unreference_resources(struct nv50_context *nv50)
for (i = 0; i < nv50->num_vtxbufs; ++i)
pipe_vertex_buffer_unreference(&nv50->vtxbuf[i]);
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) {
for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_textures[s]; ++i)
pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
@ -232,28 +232,38 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
}
}
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) {
for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_textures[s]; ++i) {
if (nv50->textures[s][i] &&
nv50->textures[s][i]->texture == res) {
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) {
nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);
} else {
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
}
if (!--ref)
return ref;
}
}
}
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) {
for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i) {
if (!(nv50->constbuf_valid[s] & (1 << i)))
continue;
if (!nv50->constbuf[s][i].user &&
nv50->constbuf[s][i].u.buf == res) {
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
nv50->constbuf_dirty[s] |= 1 << i;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) {
nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF;
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_CB(i));
} else {
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
}
if (!--ref)
return ref;
}

View file

@ -25,6 +25,7 @@
#include "nv50/nv50_3ddefs.xml.h"
#include "nv50/nv50_3d.xml.h"
#include "nv50/nv50_2d.xml.h"
#include "nv50/nv50_compute.xml.h"
// NOTE: the VS/GS/FS order is based on how command methods are laid out for
// TSC/TIC setting.
@ -58,10 +59,15 @@
#define NV50_NEW_3D_STRMOUT (1 << 21)
#define NV50_NEW_3D_MIN_SAMPLES (1 << 22)
#define NV50_NEW_3D_WINDOW_RECTS (1 << 23)
#define NV50_NEW_3D_CONTEXT (1 << 31)
#define NV50_NEW_CP_PROGRAM (1 << 0)
#define NV50_NEW_CP_GLOBALS (1 << 1)
#define NV50_NEW_CP_SURFACES (1 << 1)
#define NV50_NEW_CP_TEXTURES (1 << 2)
#define NV50_NEW_CP_SAMPLERS (1 << 3)
#define NV50_NEW_CP_CONSTBUF (1 << 4)
#define NV50_NEW_CP_GLOBALS (1 << 5)
#define NV50_NEW_CP_DRIVERCONST (1 << 6)
#define NV50_NEW_CP_BUFFERS (1 << 7)
/* 3d bufctx (during draw_vbo, blit_3d) */
#define NV50_BIND_3D_FB 0
@ -76,21 +82,25 @@
#define NV50_BIND_3D_COUNT 56
/* compute bufctx (during launch_grid) */
#define NV50_BIND_CP_GLOBAL 0
#define NV50_BIND_CP_SCREEN 1
#define NV50_BIND_CP_QUERY 2
#define NV50_BIND_CP_COUNT 3
#define NV50_BIND_CP_CB(i) ( 0 + (i))
#define NV50_BIND_CP_TEXTURES 16
#define NV50_BIND_CP_SUF 17
#define NV50_BIND_CP_BUF 18
#define NV50_BIND_CP_GLOBAL 19
#define NV50_BIND_CP_SCREEN 20
#define NV50_BIND_CP_QUERY 21
#define NV50_BIND_CP_COUNT 22
/* bufctx for other operations */
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
#define NV50_CB_TMP 123
/* fixed constant buffer binding points - low indices for user's constbufs */
#define NV50_CB_PVP 124
#define NV50_CB_PGP 126
#define NV50_CB_PVP 123
#define NV50_CB_PGP 124
#define NV50_CB_PFP 125
#define NV50_CB_PCP 126
/* constant buffer permanently mapped in as c15[] */
#define NV50_CB_AUX 127
/* size of the buffer: 64k. not all taken up, can be reduced if needed. */
@ -98,17 +108,17 @@
/* 8 user clip planes, at 4 32-bit floats each */
#define NV50_CB_AUX_UCP_OFFSET 0x0000
#define NV50_CB_AUX_UCP_SIZE (8 * 4 * 4)
/* 16 textures * NV50_MAX_3D_SHADER_STAGES shaders, each with ms_x, ms_y u32 pairs */
/* 16 textures * NV50_MAX_SHADER_STAGES shaders, each with ms_x, ms_y u32 pairs */
#define NV50_CB_AUX_TEX_MS_OFFSET 0x0080
#define NV50_CB_AUX_TEX_MS_SIZE (16 * NV50_MAX_3D_SHADER_STAGES * 2 * 4)
#define NV50_CB_AUX_TEX_MS_SIZE (16 * NV50_MAX_SHADER_STAGES * 2 * 4)
/* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */
#define NV50_CB_AUX_MS_OFFSET 0x200
#define NV50_CB_AUX_MS_OFFSET 0x280
#define NV50_CB_AUX_MS_SIZE (4 * 8 * 4 * 2)
/* Sample position pairs for the current output MS level */
#define NV50_CB_AUX_SAMPLE_OFFSET 0x300
#define NV50_CB_AUX_SAMPLE_OFFSET 0x380
#define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2)
/* Alpha test ref value */
#define NV50_CB_AUX_ALPHATEST_OFFSET 0x340
#define NV50_CB_AUX_ALPHATEST_OFFSET 0x3c0
#define NV50_CB_AUX_ALPHATEST_SIZE (4)
/* next spot: 0x344 */
/* 4 32-bit floats for the vertex runout, put at the end */
@ -145,10 +155,10 @@ struct nv50_context {
struct nv50_program *fragprog;
struct nv50_program *compprog;
struct nv50_constbuf constbuf[NV50_MAX_3D_SHADER_STAGES][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[NV50_MAX_3D_SHADER_STAGES];
uint16_t constbuf_valid[NV50_MAX_3D_SHADER_STAGES];
uint16_t constbuf_coherent[NV50_MAX_3D_SHADER_STAGES];
struct nv50_constbuf constbuf[NV50_MAX_SHADER_STAGES][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[NV50_MAX_SHADER_STAGES];
uint16_t constbuf_valid[NV50_MAX_SHADER_STAGES];
uint16_t constbuf_coherent[NV50_MAX_SHADER_STAGES];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
@ -161,11 +171,11 @@ struct nv50_context {
uint32_t instance_off; /* base vertex for instanced arrays */
uint32_t instance_max; /* max instance for current draw call */
struct pipe_sampler_view *textures[NV50_MAX_3D_SHADER_STAGES][PIPE_MAX_SAMPLERS];
unsigned num_textures[NV50_MAX_3D_SHADER_STAGES];
uint32_t textures_coherent[NV50_MAX_3D_SHADER_STAGES];
struct nv50_tsc_entry *samplers[NV50_MAX_3D_SHADER_STAGES][PIPE_MAX_SAMPLERS];
unsigned num_samplers[NV50_MAX_3D_SHADER_STAGES];
struct pipe_sampler_view *textures[NV50_MAX_SHADER_STAGES][PIPE_MAX_SAMPLERS];
unsigned num_textures[NV50_MAX_SHADER_STAGES];
uint32_t textures_coherent[NV50_MAX_SHADER_STAGES];
struct nv50_tsc_entry *samplers[NV50_MAX_SHADER_STAGES][PIPE_MAX_SAMPLERS];
unsigned num_samplers[NV50_MAX_SHADER_STAGES];
bool seamless_cube_map;
uint8_t num_so_targets;
@ -269,7 +279,9 @@ extern void nv50_clear(struct pipe_context *, unsigned buffers,
extern void nv50_init_surface_functions(struct nv50_context *);
/* nv50_tex.c */
bool nv50_validate_tic(struct nv50_context *nv50, int s);
void nv50_validate_textures(struct nv50_context *);
bool nv50_validate_tsc(struct nv50_context *nv50, int s);
void nv50_validate_samplers(struct nv50_context *);
void nv50_upload_ms_info(struct nouveau_pushbuf *);
void nv50_upload_tsc0(struct nv50_context *);

View file

@ -769,8 +769,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
PUSH_DATAh(push, screen->uniforms->offset + (4 << 16));
PUSH_DATA (push, screen->uniforms->offset + (4 << 16));
PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));
BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
@ -787,8 +787,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
nv50_upload_ms_info(push);
@ -1157,7 +1157,7 @@ nv50_screen_create(struct nouveau_device *dev)
debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 5 << 16, NULL,
&screen->uniforms);
if (ret) {
NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);

View file

@ -39,7 +39,7 @@ struct nv50_graph_state {
uint32_t semantic_psize;
int32_t index_bias;
uint32_t clip_mode;
bool uniform_buffer_bound[3];
bool uniform_buffer_bound[4];
bool prim_restart;
bool point_sprite;
bool rt_serialize;
@ -49,8 +49,8 @@ struct nv50_graph_state {
bool new_tls_space;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[3];
uint8_t num_samplers[3];
uint8_t num_textures[4];
uint8_t num_samplers[4];
uint8_t prim_size;
uint16_t scissor;
bool seamless_cube_map;

View file

@ -109,6 +109,11 @@ nv50_constbufs_validate(struct nv50_context *nv50)
}
}
}
/* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF;
nv50->constbuf_dirty[NV50_SHADER_STAGE_COMPUTE] |= nv50->constbuf_valid[NV50_SHADER_STAGE_COMPUTE];
nv50->state.uniform_buffer_bound[NV50_SHADER_STAGE_COMPUTE] = false;
}
static bool

View file

@ -592,7 +592,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
unsigned s, i;
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; ++s) {
for (s = 0; s < NV50_MAX_SHADER_STAGES; ++s) {
assert(nv50_context(pipe)->num_samplers[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
if (nv50_context(pipe)->samplers[s][i] == hwcso)
@ -626,8 +626,6 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
if (nr >= nv50->num_samplers[s])
nv50->num_samplers[s] = highest_found + 1;
nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
}
static void
@ -638,9 +636,13 @@ nv50_bind_sampler_states(struct pipe_context *pipe,
unsigned s = nv50_context_shader_stage(shader);
assert(start == 0);
assert(s != NV50_SHADER_STAGE_COMPUTE);
nv50_stage_sampler_states_bind(nv50_context(pipe), s, num_samplers,
samplers);
if (unlikely(s == NV50_SHADER_STAGE_COMPUTE))
nv50_context(pipe)->dirty_cp |= NV50_NEW_CP_SAMPLERS;
else
nv50_context(pipe)->dirty_3d |= NV50_NEW_3D_SAMPLERS;
}
@ -696,10 +698,6 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
}
nv50->num_textures[s] = nr;
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
}
static void
@ -708,11 +706,21 @@ nv50_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader,
unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{
struct nv50_context *nv50 = nv50_context(pipe);
unsigned s = nv50_context_shader_stage(shader);
assert(start == 0);
assert(s != NV50_SHADER_STAGE_COMPUTE);
nv50_stage_set_sampler_views(nv50_context(pipe), s, nr, views);
nv50_stage_set_sampler_views(nv50, s, nr, views);
if (unlikely(s == NV50_SHADER_STAGE_COMPUTE)) {
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);
nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;
} else {
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
}
}
@ -871,17 +879,27 @@ nv50_set_constant_buffer(struct pipe_context *pipe,
const unsigned s = nv50_context_shader_stage(shader);
const unsigned i = index;
if (shader == PIPE_SHADER_COMPUTE)
return;
if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
if (nv50->constbuf[s][i].user)
nv50->constbuf[s][i].u.buf = NULL;
else
if (nv50->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_CB(i));
assert(i < NV50_MAX_PIPE_CONSTBUFS);
if (nv50->constbuf[s][i].user)
nv50->constbuf[s][i].u.buf = NULL;
else
if (nv50->constbuf[s][i].u.buf) {
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
nv04_resource(nv50->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
nv50->dirty_cp |= NV50_NEW_CP_CONSTBUF;
} else {
if (nv50->constbuf[s][i].user)
nv50->constbuf[s][i].u.buf = NULL;
else
if (nv50->constbuf[s][i].u.buf)
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
}
nv50->constbuf_dirty[s] |= 1 << i;
if (nv50->constbuf[s][i].u.buf)
nv04_resource(nv50->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
if (take_ownership) {
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
@ -897,21 +915,19 @@ nv50_set_constant_buffer(struct pipe_context *pipe,
nv50->constbuf_valid[s] |= 1 << i;
nv50->constbuf_coherent[s] &= ~(1 << i);
} else
if (res) {
if (cb) {
nv50->constbuf[s][i].offset = cb->buffer_offset;
nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x10000);
nv50->constbuf_valid[s] |= 1 << i;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
if (res && res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
nv50->constbuf_coherent[s] |= 1 << i;
else
nv50->constbuf_coherent[s] &= ~(1 << i);
} else {
}
else {
nv50->constbuf_valid[s] &= ~(1 << i);
nv50->constbuf_coherent[s] &= ~(1 << i);
}
nv50->constbuf_dirty[s] |= 1 << i;
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
}
/* =============================================================================

View file

@ -235,13 +235,14 @@ nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
tic->tic[2] |= address >> 32;
}
static bool
bool
nv50_validate_tic(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nouveau_bo *txc = nv50->screen->txc;
unsigned i;
bool need_flush = false;
const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_textures[s]; ++i) {
@ -249,7 +250,10 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
struct nv04_resource *res;
if (!tic) {
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
if (unlikely(is_compute_stage))
BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
PUSH_DATA (push, (i << 1) | 0);
continue;
}
@ -288,7 +292,10 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
if (unlikely(is_compute_stage))
BEGIN_NV04(push, NV50_CP(TEX_CACHE_CTL), 1);
else
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, 0x20);
}
@ -297,19 +304,32 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
if (unlikely(is_compute_stage)) {
BCTX_REFN(nv50->bufctx_cp, CP_TEXTURES, res, RD);
BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
} else {
BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
}
PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
}
for (; i < nv50->state.num_textures[s]; ++i) {
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
if (unlikely(is_compute_stage))
BEGIN_NV04(push, NV50_CP(BIND_TIC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
PUSH_DATA (push, (i << 1) | 0);
}
if (nv50->num_textures[s]) {
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
if (unlikely(is_compute_stage))
BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
else
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
if (unlikely(is_compute_stage))
BEGIN_NV04(push, NV50_CP(CB_DATA(0)), nv50->num_textures[s] * 2);
else
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
for (i = 0; i < nv50->num_textures[s]; i++) {
struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
struct nv50_miptree *res;
@ -341,21 +361,29 @@ void nv50_validate_textures(struct nv50_context *nv50)
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
}
/* Invalidate all CP textures because they are aliased. */
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_TEXTURES);
nv50->dirty_cp |= NV50_NEW_CP_TEXTURES;
}
static bool
bool
nv50_validate_tsc(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
unsigned i;
bool need_flush = false;
const bool is_compute_stage = s == NV50_SHADER_STAGE_COMPUTE;
assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);
if (!tsc) {
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
if (is_compute_stage)
BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
continue;
}
@ -370,11 +398,17 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
}
nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
if (is_compute_stage)
BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1);
}
for (; i < nv50->state.num_samplers[s]; ++i) {
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
if (is_compute_stage)
BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
}
nv50->state.num_samplers[s] = nv50->num_samplers[s];
@ -385,7 +419,10 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
// entry is initialized, we're good to go. This is the only bit that has
// any effect on what TXF does.
if (!nv50->samplers[s][0]) {
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
if (is_compute_stage)
BEGIN_NV04(push, NV50_CP(BIND_TSC), 1);
else
BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
PUSH_DATA (push, 1);
}
@ -401,9 +438,16 @@ void nv50_validate_samplers(struct nv50_context *nv50)
need_flush |= nv50_validate_tsc(nv50, s);
if (need_flush) {
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);
if (unlikely(s == NV50_SHADER_STAGE_COMPUTE))
// TODO(pmoreau): Is this needed? Not done on nvc0
BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
else
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
}
/* Invalidate all CP samplers because they are aliased. */
nv50->dirty_cp |= NV50_NEW_CP_SAMPLERS;
}
/* There can be up to 4 different MS levels (1, 2, 4, 8). To simplify the

View file

@ -535,8 +535,7 @@ nv50_cb_push(struct nouveau_context *nv,
/* Go through all the constbuf binding points of this buffer and try to
* find one which contains the region to be updated.
*/
/* XXX compute? */
for (s = 0; s < NV50_MAX_3D_SHADER_STAGES && !cb; s++) {
for (s = 0; s < NV50_MAX_SHADER_STAGES && !cb; s++) {
uint16_t bindings = res->cb_bindings[s];
while (bindings) {
int i = ffs(bindings) - 1;