nv50: add support for texelFetch'ing MS textures, ARB_texture_multisample

Creates two areas in the AUX constbuf:
 - Sample offsets for MS textures
 - Per-texture MS settings

When executing a texelFetch with a MS sampler, looks up that texture's
settings and adjusts the parameters given to the texfetch instruction.

With this change, all the ARB_texture_multisample piglits pass, so turn
on PIPE_CAP_TEXTURE_MULTISAMPLE.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin 2014-01-12 03:32:30 -05:00 committed by Maarten Lankhorst
parent a6cf950ba2
commit 3bd40073b9
7 changed files with 164 additions and 7 deletions

View file

@ -827,6 +827,14 @@ public:
int isShadow() const { return descTable[target].shadow ? 1 : 0; } int isShadow() const { return descTable[target].shadow ? 1 : 0; }
int isMS() const { int isMS() const {
return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; } return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }
void clearMS() {
if (isMS()) {
if (isArray())
target = TEX_TARGET_2D_ARRAY;
else
target = TEX_TARGET_2D;
}
}
Target& operator=(TexTarget targ) Target& operator=(TexTarget targ)
{ {

View file

@ -1232,6 +1232,7 @@ CodeEmitterNV50::emitCVT(const Instruction *i)
case TYPE_S32: code[1] = 0x44014000; break; case TYPE_S32: code[1] = 0x44014000; break;
case TYPE_U32: code[1] = 0x44004000; break; case TYPE_U32: code[1] = 0x44004000; break;
case TYPE_F16: code[1] = 0xc4000000; break; case TYPE_F16: code[1] = 0xc4000000; break;
case TYPE_U16: code[1] = 0x44000000; break;
default: default:
assert(0); assert(0);
break; break;

View file

@ -549,6 +549,8 @@ private:
bool handleCONT(Instruction *); bool handleCONT(Instruction *);
void checkPredicate(Instruction *); void checkPredicate(Instruction *);
void loadTexMsInfo(uint32_t off, Value **ms, Value **ms_x, Value **ms_y);
void loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy);
private: private:
const Target *const targ; const Target *const targ;
@ -582,6 +584,41 @@ NV50LoweringPreSSA::visit(Function *f)
return true; return true;
} }
void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
Value **ms_x, Value **ms_y) {
// This loads the texture-indexed ms setting from the constant buffer
Value *tmp = new_LValue(func, FILE_GPR);
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
*ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL);
*ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32, off + 4), NULL);
*ms = bld.mkOp2v(OP_ADD, TYPE_U32, tmp, *ms_x, *ms_y);
}
void NV50LoweringPreSSA::loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy) {
// Given a MS level, and a sample id, compute the delta x/y
uint8_t b = prog->driver->io.msInfoCBSlot;
Value *off = new_LValue(func, FILE_ADDRESS), *t = new_LValue(func, FILE_GPR);
// The required information is at mslevel * 16 * 4 + sample * 8
// = (mslevel * 8 + sample) * 8
bld.mkOp2(OP_SHL,
TYPE_U32,
off,
bld.mkOp2v(OP_ADD, TYPE_U32, t,
bld.mkOp2v(OP_SHL, TYPE_U32, t, ms, bld.mkImm(3)),
s),
bld.mkImm(3));
*dx = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32,
prog->driver->io.msInfoBase), off);
*dy = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
FILE_MEMORY_CONST, b, TYPE_U32,
prog->driver->io.msInfoBase + 4), off);
}
bool bool
NV50LoweringPreSSA::handleTEX(TexInstruction *i) NV50LoweringPreSSA::handleTEX(TexInstruction *i)
{ {
@ -589,6 +626,29 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
const int dref = arg; const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg; const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
// handle MS, which means looking up the MS params for this texture, and
// adjusting the input coordinates to point at the right sample.
if (i->tex.target.isMS()) {
Value *x = i->getSrc(0);
Value *y = i->getSrc(1);
Value *s = i->getSrc(arg - 1);
Value *tx = new_LValue(func, FILE_GPR), *ty = new_LValue(func, FILE_GPR),
*ms, *ms_x, *ms_y, *dx, *dy;
i->tex.target.clearMS();
loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y);
loadMsInfo(ms, s, &dx, &dy);
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx);
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy);
i->setSrc(0, tx);
i->setSrc(1, ty);
i->setSrc(arg - 1, bld.loadImm(NULL, 0));
}
// dref comes before bias/lod // dref comes before bias/lod
if (i->tex.target.isShadow()) if (i->tex.target.isShadow())
if (i->op == OP_TXB || i->op == OP_TXL) if (i->op == OP_TXB || i->op == OP_TXL)

View file

@ -75,9 +75,15 @@
/* size of the buffer: 64k. not all taken up, can be reduced if needed. */ /* size of the buffer: 64k. not all taken up, can be reduced if needed. */
#define NV50_CB_AUX_SIZE (1 << 16) #define NV50_CB_AUX_SIZE (1 << 16)
/* 8 user clip planes, at 4 32-bit floats each */ /* 8 user clip planes, at 4 32-bit floats each */
#define NV50_CB_AUX_UCP_OFFSET 0x0 #define NV50_CB_AUX_UCP_OFFSET 0x0000
/* 256 textures, each with 2 16-bit integers specifying the x/y MS shift */ #define NV50_CB_AUX_UCP_SIZE (8 * 4 * 4)
#define NV50_CB_AUX_MS_OFFSET 0x80 /* 256 textures, each with ms_x, ms_y u32 pairs */
#define NV50_CB_AUX_TEX_MS_OFFSET 0x0080
#define NV50_CB_AUX_TEX_MS_SIZE (256 * 2 * 4)
/* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */
#define NV50_CB_AUX_MS_OFFSET 0x880
#define NV50_CB_AUX_MS_SIZE (4 * 8 * 4 * 2)
/* next spot: 0x980 */
/* 4 32-bit floats for the vertex runout, put at the end */ /* 4 32-bit floats for the vertex runout, put at the end */
#define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10) #define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)
@ -251,6 +257,7 @@ extern void nv50_init_surface_functions(struct nv50_context *);
/* nv50_tex.c */ /* nv50_tex.c */
void nv50_validate_textures(struct nv50_context *); void nv50_validate_textures(struct nv50_context *);
void nv50_validate_samplers(struct nv50_context *); void nv50_validate_samplers(struct nv50_context *);
void nv50_upload_ms_info(struct nouveau_pushbuf *);
struct pipe_sampler_view * struct pipe_sampler_view *
nv50_create_texture_view(struct pipe_context *, nv50_create_texture_view(struct pipe_context *,

View file

@ -329,9 +329,14 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info->bin.source = (void *)prog->pipe.tokens; info->bin.source = (void *)prog->pipe.tokens;
info->io.ucpCBSlot = 15; info->io.ucpCBSlot = 15;
info->io.ucpBase = 0; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr; info->io.genUserClip = prog->vp.clpd_nr;
info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
info->assignSlots = nv50_program_assign_varying_slots; info->assignSlots = nv50_program_assign_varying_slots;
prog->vp.bfc[0] = 0xff; prog->vp.bfc[0] = 0xff;

View file

@ -183,8 +183,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 0; return 0;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 1; return 1;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@ -480,7 +481,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
/* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << 6) | NV50_CB_AUX); PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f);
@ -490,6 +491,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
nv50_upload_ms_info(push);
/* max TIC (bits 4:8) & TSC bindings, per program type */ /* max TIC (bits 4:8) & TSC bindings, per program type */
for (i = 0; i < 3; ++i) { for (i = 0; i < 3; ++i) {
BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);

View file

@ -143,7 +143,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
tic[3] = mt->level[0].pitch; tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0; tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | mt->base.base.height0; tic[5] = (1 << 16) | (mt->base.base.height0);
} }
tic[6] = tic[6] =
tic[7] = 0; tic[7] = 0;
@ -284,6 +284,24 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
PUSH_DATA (push, (i << 1) | 0); PUSH_DATA (push, (i << 1) | 0);
} }
if (nv50->num_textures[s]) {
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (NV50_CB_AUX_TEX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
for (i = 0; i < nv50->num_textures[s]; i++) {
struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
struct nv50_miptree *res;
if (!tic) {
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
continue;
}
res = nv50_miptree(tic->pipe.texture);
PUSH_DATA (push, res->ms_x);
PUSH_DATA (push, res->ms_y);
}
}
nv50->state.num_textures[s] = nv50->num_textures[s]; nv50->state.num_textures[s] = nv50->num_textures[s];
return need_flush; return need_flush;
@ -354,3 +372,58 @@ void nv50_validate_samplers(struct nv50_context *nv50)
PUSH_DATA (nv50->base.pushbuf, 0); PUSH_DATA (nv50->base.pushbuf, 0);
} }
} }
/* There can be up to 4 different MS levels (1, 2, 4, 8). To simplify the
* shader logic, allow each one to take up 8 offsets.
*/
#define COMBINE(x, y) x, y
#define DUMMY 0, 0
static const uint32_t msaa_sample_xy_offsets[] = {
/* MS1 */
COMBINE(0, 0),
DUMMY,
DUMMY,
DUMMY,
DUMMY,
DUMMY,
DUMMY,
DUMMY,
/* MS2 */
COMBINE(0, 0),
COMBINE(1, 0),
DUMMY,
DUMMY,
DUMMY,
DUMMY,
DUMMY,
DUMMY,
/* MS4 */
COMBINE(0, 0),
COMBINE(1, 0),
COMBINE(0, 1),
COMBINE(1, 1),
DUMMY,
DUMMY,
DUMMY,
DUMMY,
/* MS8 */
COMBINE(0, 0),
COMBINE(1, 0),
COMBINE(0, 1),
COMBINE(1, 1),
COMBINE(2, 0),
COMBINE(3, 0),
COMBINE(2, 1),
COMBINE(3, 1),
};
void nv50_upload_ms_info(struct nouveau_pushbuf *push)
{
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (NV50_CB_AUX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), Elements(msaa_sample_xy_offsets));
PUSH_DATAp(push, msaa_sample_xy_offsets, Elements(msaa_sample_xy_offsets));
}