mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
nvc0: bind driver cb for compute on c7[] for Kepler
Instead of using the screen->parm buffer object which will be removed, upload auxiliary constants to uniform_bo to be consistent regarding what we already do for Fermi. This breaks surfaces support (for compute only) but this will be properly re-introduced later for ARB_shader_image_load_store. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
f72de6f386
commit
debd910512
4 changed files with 37 additions and 45 deletions
|
|
@ -110,6 +110,12 @@
|
|||
/* 32 textures handles, at 1 32-bits integer each */
|
||||
#define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4
|
||||
#define NVC0_CB_AUX_TEX_SIZE (32 * 4)
|
||||
/* 8 sets of 32-bits coordinate offsets */
|
||||
#define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */
|
||||
#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
|
||||
/* block/grid size, at 3 32-bits integers each and gridid */
|
||||
#define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */
|
||||
#define NVC0_CB_AUX_GRID_SIZE (7 * 4)
|
||||
/* 8 user clip planes, at 4 32-bits floats each */
|
||||
#define NVC0_CB_AUX_UCP_INFO 0x100
|
||||
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
|
||||
|
|
|
|||
|
|
@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
|
|||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.auxCBSlot = 0;
|
||||
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
|
||||
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
|
||||
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
|
||||
info->io.auxCBSlot = 7;
|
||||
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
|
||||
info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
|
||||
info->io.bufInfoBase = 0; /* TODO */
|
||||
} else {
|
||||
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
|
||||
info->io.suInfoBase = 0; /* TODO */
|
||||
}
|
||||
info->io.msInfoCBSlot = 0;
|
||||
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
|
||||
info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
|
||||
info->io.suInfoBase = 0; /* TODO */
|
||||
} else {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
|
|||
int i;
|
||||
int ret;
|
||||
uint32_t obj_class;
|
||||
uint64_t address;
|
||||
|
||||
switch (dev->chipset & ~0xf) {
|
||||
case 0x100:
|
||||
|
|
@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
|
||||
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
|
||||
&screen->parm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
|
|||
}
|
||||
|
||||
BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
|
||||
PUSH_DATA (push, 0); /* does not interefere with 3D */
|
||||
PUSH_DATA (push, 7); /* does not interfere with 3D */
|
||||
|
||||
if (obj_class == NVF0_COMPUTE_CLASS)
|
||||
IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
|
||||
|
||||
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
|
||||
|
||||
/* MS sample coordinate offsets: these do not work with _ALT modes ! */
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
|
||||
PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
|
||||
PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, 64);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
|
|||
PUSH_DATA (push, 3); /* 7 */
|
||||
PUSH_DATA (push, 1);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
|
||||
|
|
@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
|
|||
uint32_t mask;
|
||||
unsigned i;
|
||||
const unsigned t = 1;
|
||||
uint64_t address;
|
||||
|
||||
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
|
||||
|
||||
mask = nvc0->surfaces_dirty[t];
|
||||
while (mask) {
|
||||
|
|
@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
|
|||
* directly instead of via binding points, so we have to supply them.
|
||||
*/
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
|
||||
PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
|
||||
PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, 64);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
@ -271,6 +277,7 @@ static void
|
|||
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
uint64_t address;
|
||||
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
|
||||
unsigned i, n;
|
||||
|
|
@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
|
|||
n = util_logbase2(dirty) + 1 - i;
|
||||
assert(n);
|
||||
|
||||
address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
|
||||
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
|
||||
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, address);
|
||||
PUSH_DATA (push, address);
|
||||
PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
|
||||
PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, n * 4);
|
||||
PUSH_DATA (push, 0x1);
|
||||
|
|
@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
|
|||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_program *cp = nvc0->compprog;
|
||||
uint64_t address;
|
||||
|
||||
address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
|
||||
|
||||
if (cp->parm_size) {
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
|
|
@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
|
|||
PUSH_DATAp(push, input, cp->parm_size / 4);
|
||||
}
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
|
||||
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
|
||||
PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
|
||||
PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
|
||||
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
|
||||
PUSH_DATA (push, 7 * 4);
|
||||
PUSH_DATA (push, 0x1);
|
||||
|
|
@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
|
|||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
|
||||
}
|
||||
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
|
||||
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
|
||||
nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
|
||||
NVC0_CB_AUX_INFO(5), 1 << 10);
|
||||
}
|
||||
|
||||
static inline struct nve4_cp_launch_desc *
|
||||
|
|
@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
|
|||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
const unsigned s = 5;
|
||||
unsigned i;
|
||||
uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
|
||||
uint32_t commands[2][32];
|
||||
unsigned n[2] = { 0, 0 };
|
||||
|
||||
for (i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
|
|
|
|||
|
|
@ -4,31 +4,6 @@
|
|||
|
||||
#include "nvc0/nve4_compute.xml.h"
|
||||
|
||||
/* Input space is implemented as c0[], to which we bind the screen->parm bo.
|
||||
*/
|
||||
#define NVE4_CP_INPUT_USER 0x0000
|
||||
#define NVE4_CP_INPUT_USER_LIMIT 0x1000
|
||||
#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
|
||||
#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
|
||||
#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
|
||||
#define NVE4_CP_INPUT_GRIDID 0x1018
|
||||
#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
|
||||
#define NVE4_CP_INPUT_TEX_STRIDE 4
|
||||
#define NVE4_CP_INPUT_TEX_MAX 32
|
||||
#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
|
||||
#define NVE4_CP_INPUT_SUF_STRIDE 64
|
||||
#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
|
||||
#define NVE4_CP_INPUT_SUF_MAX 32
|
||||
#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900
|
||||
#define NVE4_CP_INPUT_TEMP_PTR 0x1908
|
||||
#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910
|
||||
#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
|
||||
#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918
|
||||
#define NVE4_CP_INPUT_SIZE 0x1a00
|
||||
#define NVE4_CP_PARAM_TRAP_INFO 0x2000
|
||||
#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)
|
||||
#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
|
||||
|
||||
struct nve4_cp_launch_desc
|
||||
{
|
||||
u32 unk0[8];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue