i915g: split up hw state emission into small atoms

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Daniel Vetter 2011-03-01 22:14:50 +01:00
parent 4c4ab5668c
commit 179cb58795
2 changed files with 308 additions and 274 deletions

View file

@ -40,12 +40,18 @@
struct i915_tracked_hw_state {
const char *name;
void (*validate)(struct i915_context *);
void (*validate)(struct i915_context *, unsigned *batch_space);
void (*emit)(struct i915_context *);
unsigned dirty, batch_space;
};
static void
validate_flush(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = i915->flush_dirty ? 1 : 0;
}
static void
emit_flush(struct i915_context *i915)
{
@ -61,32 +67,163 @@ emit_flush(struct i915_context *i915)
OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
}
uint32_t invariant_state[] = {
_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
_3DSTATE_DFLT_DIFFUSE_CMD, 0,
_3DSTATE_DFLT_SPEC_CMD, 0,
_3DSTATE_DFLT_Z_CMD, 0,
_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) |
CSB_TCB(6, 6) |
CSB_TCB(7, 7),
_3DSTATE_RASTER_RULES_CMD |
ENABLE_POINT_RASTER_RULE |
OGL_POINT_RASTER_RULE |
ENABLE_LINE_STRIP_PROVOKE_VRTX |
ENABLE_TRI_FAN_PROVOKE_VRTX |
LINE_STRIP_PROVOKE_VRTX(1) |
TRI_FAN_PROVOKE_VRTX(2) |
ENABLE_TEXKILL_3D_4D |
TEXKILL_4D,
_3DSTATE_DEPTH_SUBRECT_DISABLE,
/* disable indirect state for now
*/
_3DSTATE_LOAD_INDIRECT | 0, 0};
static void
validate_immediate(struct i915_context *i915)
emit_invariant(struct i915_context *i915)
{
i915_winsys_batchbuffer_write(i915->batch, invariant_state,
Elements(invariant_state)*sizeof(uint32_t));
}
static void
validate_immediate(struct i915_context *i915, unsigned *batch_space)
{
unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
i915->immediate_dirty;
if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0))
i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
*batch_space = 1 + util_bitcount(dirty);
}
static void
validate_static(struct i915_context *i915)
emit_immediate(struct i915_context *i915)
{
if (i915->current.cbuf_bo)
/* remove unwatned bits and S7 */
unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
i915->immediate_dirty;
int i, num = util_bitcount(dirty);
assert(num && num <= I915_MAX_IMMEDIATE);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
dirty << 4 | (num - 1));
if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
if (i915->vbo)
OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
i915->current.immediate[I915_IMMEDIATE_S0]);
else
OUT_BATCH(0);
}
for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
if (dirty & (1 << i))
OUT_BATCH(i915->current.immediate[i]);
}
}
static void
validate_dynamic(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
}
static void
emit_dynamic(struct i915_context *i915)
{
int i;
for (i = 0; i < I915_MAX_DYNAMIC; i++) {
if (i915->dynamic_dirty & (1 << i))
OUT_BATCH(i915->current.dynamic[i]);
}
}
static void
validate_static(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = 2 + 5; /* including DRAW_RECT */
if (i915->current.cbuf_bo) {
i915->validation_buffers[i915->num_validation_buffers++]
= i915->current.cbuf_bo;
*batch_space += 3;
}
if (i915->current.depth_bo)
if (i915->current.depth_bo) {
i915->validation_buffers[i915->num_validation_buffers++]
= i915->current.depth_bo;
*batch_space += 3;
}
}
static void
validate_map(struct i915_context *i915)
emit_static(struct i915_context *i915)
{
if (i915->current.cbuf_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(i915->current.cbuf_flags);
OUT_RELOC(i915->current.cbuf_bo,
I915_USAGE_RENDER,
0);
}
/* What happens if no zbuf??
*/
if (i915->current.depth_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(i915->current.depth_flags);
OUT_RELOC(i915->current.depth_bo,
I915_USAGE_RENDER,
0);
}
{
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(i915->current.dst_buf_vars);
}
}
static void
validate_map(struct i915_context *i915, unsigned *batch_space)
{
const uint enabled = i915->current.sampler_enable_flags;
uint unit;
struct i915_texture *tex;
*batch_space = i915->current.sampler_enable_nr ?
2 + 3*i915->current.sampler_enable_nr : 0;
for (unit = 0; unit < I915_TEX_UNITS; unit++) {
if (enabled & (1 << unit)) {
@ -96,26 +233,159 @@ validate_map(struct i915_context *i915)
}
}
const static struct i915_tracked_hw_state hw_atoms[] = {
{ "flush", NULL, emit_flush, I915_HW_FLUSH, 1 },
{ "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE },
{ "static", validate_static, NULL, I915_HW_STATIC },
{ "map", validate_map, NULL, I915_HW_MAP }
};
static void
emit_map(struct i915_context *i915)
{
const uint nr = i915->current.sampler_enable_nr;
if (nr) {
const uint enabled = i915->current.sampler_enable_flags;
uint unit;
uint count = 0;
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
OUT_BATCH(enabled);
for (unit = 0; unit < I915_TEX_UNITS; unit++) {
if (enabled & (1 << unit)) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
struct i915_winsys_buffer *buf = texture->buffer;
assert(buf);
count++;
OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
}
}
assert(count == nr);
}
}
static void
validate_sampler(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = i915->current.sampler_enable_nr ?
2 + 3*i915->current.sampler_enable_nr : 0;
}
static void
emit_sampler(struct i915_context *i915)
{
if (i915->current.sampler_enable_nr) {
int i;
OUT_BATCH( _3DSTATE_SAMPLER_STATE |
(3 * i915->current.sampler_enable_nr) );
OUT_BATCH( i915->current.sampler_enable_flags );
for (i = 0; i < I915_TEX_UNITS; i++) {
if (i915->current.sampler_enable_flags & (1<<i)) {
OUT_BATCH( i915->current.sampler[i][0] );
OUT_BATCH( i915->current.sampler[i][1] );
OUT_BATCH( i915->current.sampler[i][2] );
}
}
}
}
static void
validate_constants(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = i915->fs->num_constants ?
2 + 4*i915->fs->num_constants : 0;
}
static void
emit_constants(struct i915_context *i915)
{
/* Collate the user-defined constants with the fragment shader's
* immediates according to the constant_flags[] array.
*/
const uint nr = i915->fs->num_constants;
if (nr) {
uint i;
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
OUT_BATCH((1 << nr) - 1);
for (i = 0; i < nr; i++) {
const uint *c;
if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
/* grab user-defined constant */
c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
c += 4 * i;
}
else {
/* emit program constant */
c = (uint *) i915->fs->constants[i];
}
#if 0 /* debug */
{
float *f = (float *) c;
printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
? "user" : "immediate"));
}
#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
}
}
}
static void
validate_program(struct i915_context *i915, unsigned *batch_space)
{
*batch_space = i915->fs->program_len;
}
static void
emit_program(struct i915_context *i915)
{
uint i;
/* we should always have, at least, a pass-through program */
assert(i915->fs->program_len > 0);
for (i = 0; i < i915->fs->program_len; i++) {
OUT_BATCH(i915->fs->program[i]);
}
}
static void
emit_draw_rect(struct i915_context *i915)
{
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
OUT_BATCH(i915->current.draw_offset);
OUT_BATCH(i915->current.draw_size);
OUT_BATCH(i915->current.draw_offset);
}
static boolean
i915_validate_state(struct i915_context *i915, unsigned *batch_space)
{
int i;
unsigned tmp;
i915->num_validation_buffers = 0;
if (i915->hardware_dirty & I915_HW_INVARIANT)
*batch_space = Elements(invariant_state);
else
*batch_space = 0;
for (i = 0; i < Elements(hw_atoms); i++)
if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) {
hw_atoms[i].validate(i915);
*batch_space += hw_atoms[i].batch_space;
}
#define VALIDATE_ATOM(atom, hw_dirty) \
if (i915->hardware_dirty & hw_dirty) { \
validate_##atom(i915, &tmp); \
*batch_space += tmp; }
VALIDATE_ATOM(flush, I915_HW_FLUSH);
VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
VALIDATE_ATOM(static, I915_HW_STATIC);
VALIDATE_ATOM(map, I915_HW_MAP);
VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
VALIDATE_ATOM(program, I915_HW_PROGRAM);
#undef VALIDATE_ATOM
if (i915->num_validation_buffers == 0)
return TRUE;
@ -127,16 +397,6 @@ i915_validate_state(struct i915_context *i915, unsigned *batch_space)
return TRUE;
}
static void
emit_state(struct i915_context *i915)
{
int i;
for (i = 0; i < Elements(hw_atoms); i++)
if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit)
hw_atoms[i].emit(i915);
}
/* Push the state into the sarea and/or texture memory.
*/
void
@ -182,247 +442,20 @@ i915_emit_hardware_state(struct i915_context *i915 )
save_ptr = (uintptr_t)i915->batch->ptr;
save_relocs = i915->batch->relocs;
emit_state(i915);
/* 14 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_INVARIANT)
{
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
AA_LINE_ECAAR_WIDTH_1_0 |
AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
OUT_BATCH(0);
OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) |
CSB_TCB(6, 6) |
CSB_TCB(7, 7));
OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
ENABLE_POINT_RASTER_RULE |
OGL_POINT_RASTER_RULE |
ENABLE_LINE_STRIP_PROVOKE_VRTX |
ENABLE_TRI_FAN_PROVOKE_VRTX |
LINE_STRIP_PROVOKE_VRTX(1) |
TRI_FAN_PROVOKE_VRTX(2) |
ENABLE_TEXKILL_3D_4D |
TEXKILL_4D);
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
/* disable indirect state for now
*/
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
OUT_BATCH(0);
}
/* 7 dwords, 1 relocs */
if (i915->hardware_dirty & I915_HW_IMMEDIATE)
{
/* remove unwatned bits and S7 */
unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
i915->immediate_dirty;
int i, num = util_bitcount(dirty);
assert(num && num <= I915_MAX_IMMEDIATE);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
dirty << 4 | (num - 1));
if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
if (i915->vbo)
OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
i915->current.immediate[I915_IMMEDIATE_S0]);
else
OUT_BATCH(0);
}
for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
if (dirty & (1 << i))
OUT_BATCH(i915->current.immediate[i]);
}
}
#if 01
/* I915_MAX_DYNAMIC dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_DYNAMIC)
{
int i;
for (i = 0; i < I915_MAX_DYNAMIC; i++) {
if (i915->dynamic_dirty & (1 << i))
OUT_BATCH(i915->current.dynamic[i]);
}
}
#endif
#if 01
/* 8 dwords, 2 relocs */
if (i915->hardware_dirty & I915_HW_STATIC)
{
if (i915->current.cbuf_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(i915->current.cbuf_flags);
OUT_RELOC(i915->current.cbuf_bo,
I915_USAGE_RENDER,
0);
}
/* What happens if no zbuf??
*/
if (i915->current.depth_bo) {
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(i915->current.depth_flags);
OUT_RELOC(i915->current.depth_bo,
I915_USAGE_RENDER,
0);
}
{
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(i915->current.dst_buf_vars);
}
}
#endif
#if 01
/* texture images */
/* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */
if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER))
{
const uint nr = i915->current.sampler_enable_nr;
if (nr) {
const uint enabled = i915->current.sampler_enable_flags;
uint unit;
uint count = 0;
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
OUT_BATCH(enabled);
for (unit = 0; unit < I915_TEX_UNITS; unit++) {
if (enabled & (1 << unit)) {
struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
struct i915_winsys_buffer *buf = texture->buffer;
assert(buf);
count++;
OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
}
}
assert(count == nr);
}
}
#endif
#if 01
/* samplers */
/* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_SAMPLER)
{
if (i915->current.sampler_enable_nr) {
int i;
OUT_BATCH( _3DSTATE_SAMPLER_STATE |
(3 * i915->current.sampler_enable_nr) );
OUT_BATCH( i915->current.sampler_enable_flags );
for (i = 0; i < I915_TEX_UNITS; i++) {
if (i915->current.sampler_enable_flags & (1<<i)) {
OUT_BATCH( i915->current.sampler[i][0] );
OUT_BATCH( i915->current.sampler[i][1] );
OUT_BATCH( i915->current.sampler[i][2] );
}
}
}
}
#endif
#if 01
/* constants */
/* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_CONSTANTS)
{
/* Collate the user-defined constants with the fragment shader's
* immediates according to the constant_flags[] array.
*/
const uint nr = i915->fs->num_constants;
if (nr) {
uint i;
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
OUT_BATCH((1 << nr) - 1);
for (i = 0; i < nr; i++) {
const uint *c;
if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
/* grab user-defined constant */
c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data;
c += 4 * i;
}
else {
/* emit program constant */
c = (uint *) i915->fs->constants[i];
}
#if 0 /* debug */
{
float *f = (float *) c;
printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
? "user" : "immediate"));
}
#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
}
}
}
#endif
#if 01
/* Fragment program */
/* i915->current.program_len dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_PROGRAM)
{
uint i;
/* we should always have, at least, a pass-through program */
assert(i915->fs->program_len > 0);
for (i = 0; i < i915->fs->program_len; i++) {
OUT_BATCH(i915->fs->program[i]);
}
}
#endif
#if 01
/* drawing surface size */
/* 6 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_STATIC)
{
/* XXX flush only required when the draw_offset changes! */
OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
OUT_BATCH(i915->current.draw_offset);
OUT_BATCH(i915->current.draw_size);
OUT_BATCH(i915->current.draw_offset);
}
#endif
#define EMIT_ATOM(atom, hw_dirty) \
if (i915->hardware_dirty & hw_dirty) \
emit_##atom(i915);
EMIT_ATOM(flush, I915_HW_FLUSH);
EMIT_ATOM(invariant, I915_HW_INVARIANT);
EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
EMIT_ATOM(static, I915_HW_STATIC);
EMIT_ATOM(map, I915_HW_MAP);
EMIT_ATOM(sampler, I915_HW_SAMPLER);
EMIT_ATOM(constants, I915_HW_CONSTANTS);
EMIT_ATOM(program, I915_HW_PROGRAM);
EMIT_ATOM(draw_rect, I915_HW_STATIC);
#undef EMIT_ATOM
I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__,
((uintptr_t)i915->batch->ptr - save_ptr) / 4,
@ -431,4 +464,5 @@ i915_emit_hardware_state(struct i915_context *i915 )
i915->hardware_dirty = 0;
i915->immediate_dirty = 0;
i915->dynamic_dirty = 0;
i915->flush_dirty = 0;
}

View file

@ -164,7 +164,7 @@ static void update_framebuffer(struct i915_context *i915)
assert(ret);
if (i915->current.draw_offset != draw_offset) {
i915->current.draw_offset = draw_offset;
/* XXX: only emit flush on change and not always in emit */
i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH);
}
i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);