nvc0: add support for PIPE_CAP_SAMPLE_SHADING

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin 2014-04-21 00:28:13 -04:00
parent f715a0a39a
commit af38ef907c
15 changed files with 131 additions and 14 deletions

View file

@ -136,6 +136,7 @@ enum operation
OP_DFDY,
OP_RDSV, // read system value
OP_WRSV, // write system value
OP_PIXLD, // get info about raster object or surfaces
OP_QUADOP,
OP_QUADON,
OP_QUADPOP,
@ -214,6 +215,12 @@ enum operation
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
#define NV50_IR_SUBOP_PIXLD_COUNT 0
#define NV50_IR_SUBOP_PIXLD_COVMASK 1
#define NV50_IR_SUBOP_PIXLD_COVERED 2
#define NV50_IR_SUBOP_PIXLD_OFFSET 3
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
#define NV50_IR_SUBOP_MADSP_SD 0xffff
// Yes, we could represent those with DataType.
// Or put the type into operation and have a couple 1000 values in that enum.

View file

@ -113,6 +113,8 @@ private:
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
void emitPIXLD(const Instruction *);
void emitFlow(const Instruction *);
inline void defId(const ValueDef&, const int pos);
@ -1129,6 +1131,14 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask
emitPredicate(i);
}
void
CodeEmitterGK110::emitPIXLD(const Instruction *i)
{
emitForm_L(i, 0x7f4, 2, Modifier(0));
code[1] |= i->subOp << 2;
code[1] |= 0x00070000;
}
void
CodeEmitterGK110::emitFlow(const Instruction *i)
{
@ -1684,6 +1694,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_TEXBAR:
emitTEXBAR(insn);
break;
case OP_PIXLD:
emitPIXLD(insn);
break;
case OP_BRA:
case OP_CALL:
case OP_PRERET:

View file

@ -135,6 +135,8 @@ private:
void emitVSHL(const Instruction *);
void emitVectorSubOp(const Instruction *);
void emitPIXLD(const Instruction *);
inline void defId(const ValueDef&, const int pos);
inline void defId(const Instruction *, int d, const int pos);
inline void srcId(const ValueRef&, const int pos);
@ -2141,6 +2143,15 @@ CodeEmitterNVC0::emitVSHL(const Instruction *i)
code[1] |= 1 << 16;
}
void
CodeEmitterNVC0::emitPIXLD(const Instruction *i)
{
assert(i->encSize == 8);
emitForm_A(i, HEX64(10000000, 00000006));
code[0] |= i->subOp << 5;
code[1] |= 0x00e00000;
}
bool
CodeEmitterNVC0::emitInstruction(Instruction *insn)
{
@ -2390,6 +2401,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_VSHL:
emitVSHL(insn);
break;
case OP_PIXLD:
emitPIXLD(insn);
break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:

View file

@ -1426,6 +1426,27 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
bld.mkLoad(TYPE_U32, i->getDef(0),
bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
break;
case SV_SAMPLE_INDEX:
// TODO: Properly pass source as an address in the PIX address space
// (which can be of the form [r0+offset]). But this is currently
// unnecessary.
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
break;
case SV_SAMPLE_POS: {
Value *off = new_LValue(func, FILE_GPR);
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
bld.mkLoad(TYPE_F32,
i->getDef(0),
bld.mkSymbol(
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase +
4 * sym->reg.data.sv.index),
off);
break;
}
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));

View file

@ -166,6 +166,7 @@ const char *operationStr[OP_LAST + 1] =
"dfdy",
"rdsv",
"wrsv",
"pixld",
"quadop",
"quadon",
"quadpop",

View file

@ -49,7 +49,7 @@ const uint8_t Target::operationSrcNr[] =
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 2, 0, 0, // RDSV, WRSV, QUADOP, QUADON, QUADPOP
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
@ -112,9 +112,9 @@ const OpClass Target::operationClass[] =
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
// POPCNT, INSBF, EXTBF, PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
// ATOM, BAR

View file

@ -282,6 +282,8 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
case SV_GRIDID: return kepler ? 0x18 : ~0;
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
default:
return 0xffffffff;
}

View file

@ -56,6 +56,7 @@
#define NVC0_NEW_TFB_TARGETS (1 << 21)
#define NVC0_NEW_IDXBUF (1 << 22)
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@ -182,6 +183,7 @@ struct nvc0_context {
struct pipe_clip_state clip;
unsigned sample_mask;
unsigned min_samples;
boolean vbo_push_hint;

View file

@ -134,12 +134,17 @@ static int
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
unsigned count = info->prop.fp.numColourResults * 4;
unsigned i, c;
unsigned i, c, ci;
for (i = 0; i < info->numOutputs; ++i)
if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
for (i = 0, ci = 0; i < info->numOutputs; ++i) {
if (info->out[i].sn == TGSI_SEMANTIC_COLOR) {
for (c = 0; c < 4; ++c)
info->out[i].slot[c] = info->out[i].si * 4 + c;
info->out[i].slot[c] = ci * 4 + c;
ci++;
}
}
assert(ci == info->prop.fp.numColourResults);
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.sampleMask].slot[0] = count++;
@ -539,6 +544,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.genUserClip = prog->vp.num_ucps;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
info->io.sampleInterp = prog->fp.sample_interp;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@ -551,10 +557,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
info->io.resInfoCBSlot = 15;
info->io.texBindBase = 0x20;
info->io.suInfoBase = 0; /* TODO */
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}

View file

@ -45,6 +45,7 @@ struct nvc0_program {
struct {
uint8_t early_z;
uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
uint8_t sample_interp;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */

View file

@ -181,11 +181,11 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_SAMPLE_SHADING:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
return 1;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;

View file

@ -107,6 +107,8 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *fp = nvc0->fragprog;
fp->fp.sample_interp = nvc0->min_samples > 1;
if (!nvc0_program_validate(nvc0, fp))
return;
nvc0_program_update_context_state(nvc0, fp, 4);

View file

@ -858,6 +858,16 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
}
static void
nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
if (nvc0->min_samples != min_samples) {
nvc0->min_samples = min_samples;
nvc0->dirty |= NVC0_NEW_MIN_SAMPLES;
}
}
static void
nvc0_set_framebuffer_state(struct pipe_context *pipe,
@ -1215,6 +1225,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_stencil_ref = nvc0_set_stencil_ref;
pipe->set_clip_state = nvc0_set_clip_state;
pipe->set_sample_mask = nvc0_set_sample_mask;
pipe->set_min_samples = nvc0_set_min_samples;
pipe->set_constant_buffer = nvc0_set_constant_buffer;
pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
@ -1237,4 +1248,5 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_shader_resources = nvc0_set_shader_resources;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
}

View file

@ -72,7 +72,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
unsigned i;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
boolean serialize = FALSE;
@ -180,6 +180,20 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
float xy[2];
nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
PUSH_DATAf(push, xy[0]);
PUSH_DATAf(push, xy[1]);
}
if (serialize)
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
@ -452,8 +466,19 @@ nvc0_validate_sample_mask(struct nvc0_context *nvc0)
PUSH_DATA (push, mask[1]);
PUSH_DATA (push, mask[2]);
PUSH_DATA (push, mask[3]);
BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1);
PUSH_DATA (push, 0x01);
}
static void
nvc0_validate_min_samples(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int samples;
samples = util_next_power_of_two(nvc0->min_samples);
if (samples > 1)
samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
}
void
@ -560,7 +585,8 @@ static struct state_validate {
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))

View file

@ -516,6 +516,7 @@ struct nvc0_blitctx
unsigned num_samplers[5];
struct pipe_sampler_view *texture[2];
struct nv50_tsc_entry *sampler[2];
unsigned min_samples;
uint32_t dirty;
} saved;
struct nvc0_rasterizer_stateobj rast;
@ -746,6 +747,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
ctx->saved.gp = nvc0->gmtyprog;
ctx->saved.fp = nvc0->fragprog;
ctx->saved.min_samples = nvc0->min_samples;
nvc0->rast = &ctx->rast;
nvc0->vertprog = &blitter->vp;
@ -772,6 +775,8 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
nvc0->num_samplers[s] = 0;
nvc0->num_samplers[4] = 2;
nvc0->min_samples = 1;
ctx->saved.dirty = nvc0->dirty;
nvc0->textures_dirty[4] |= 3;
@ -781,7 +786,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
nvc0->dirty = NVC0_NEW_FRAMEBUFFER |
nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES |
NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS;
@ -809,6 +814,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->gmtyprog = blit->saved.gp;
nvc0->fragprog = blit->saved.fp;
nvc0->min_samples = blit->saved.min_samples;
pipe_sampler_view_reference(&nvc0->textures[4][0], NULL);
pipe_sampler_view_reference(&nvc0->textures[4][1], NULL);
@ -841,6 +848,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
NVC0_NEW_TFB_TARGETS);
nvc0->base.pipe.set_min_samples(&nvc0->base.pipe, blit->saved.min_samples);
}
static void