ilo: embed ilo_state_sol in ilo_shader

This commit is contained in:
Chia-I Wu 2015-05-29 15:25:13 +08:00
parent 960ca7d5e3
commit eaf2c73899
8 changed files with 153 additions and 150 deletions

View file

@ -37,6 +37,7 @@
#include "ilo_dev.h"
#include "ilo_state_3d.h"
#include "ilo_state_sampler.h"
#include "ilo_state_sol.h"
#include "ilo_builder.h"
static inline void
@ -1013,131 +1014,41 @@ gen7_disable_3DSTATE_GS(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
int render_stream,
bool render_disable,
int vertex_attrib_count,
const int *buf_strides)
const struct ilo_state_sol *sol)
{
const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
uint32_t *dw;
int buf_mask;
ILO_DEV_ASSERT(builder->dev, 7, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
if (render_disable)
dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
if (buf_strides) {
buf_mask = ((bool) buf_strides[3]) << 3 |
((bool) buf_strides[2]) << 2 |
((bool) buf_strides[1]) << 1 |
((bool) buf_strides[0]);
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[3] = buf_strides[1] << 16 | buf_strides[0];
dw[4] = buf_strides[3] << 16 | buf_strides[1];
}
} else {
buf_mask = 0;
}
if (buf_mask) {
int read_len;
dw[1] |= GEN7_SO_DW1_SO_ENABLE |
GEN7_SO_DW1_STATISTICS;
/* API_OPENGL */
if (true)
dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT;
if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
read_len = (vertex_attrib_count + 1) / 2;
if (!read_len)
read_len = 1;
dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
(read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
} else {
dw[2] = 0;
/* see sol_set_gen7_3DSTATE_STREAMOUT() */
dw[1] = sol->so[0];
dw[2] = sol->so[1];
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
dw[3] = sol->so[2];
dw[4] = sol->so[3];
}
}
static inline void
gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
const struct pipe_stream_output_info *so_info)
const struct ilo_state_sol *sol)
{
/*
* Note that "DWord Length" has 9 bits for this command and the type of
* cmd_len cannot be uint8_t.
*/
uint16_t cmd_len;
struct {
int buf_selects;
int decl_count;
uint16_t decls[128];
} streams[4];
unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
int hw_decl_count, i;
int cmd_decl_count;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 8);
memset(streams, 0, sizeof(streams));
memset(buf_offsets, 0, sizeof(buf_offsets));
for (i = 0; i < so_info->num_outputs; i++) {
unsigned decl, st, buf, reg, mask;
st = so_info->output[i].stream;
buf = so_info->output[i].output_buffer;
/* pad with holes */
while (buf_offsets[buf] < so_info->output[i].dst_offset) {
int num_dwords;
num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
if (num_dwords > 4)
num_dwords = 4;
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
GEN7_SO_DECL_HOLE_FLAG |
((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
assert(streams[st].decl_count < Elements(streams[st].decls));
streams[st].decls[streams[st].decl_count++] = decl;
buf_offsets[buf] += num_dwords;
}
assert(buf_offsets[buf] == so_info->output[i].dst_offset);
reg = so_info->output[i].register_index;
mask = ((1 << so_info->output[i].num_components) - 1) <<
so_info->output[i].start_component;
decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
assert(streams[st].decl_count < Elements(streams[st].decls));
streams[st].buf_selects |= 1 << buf;
streams[st].decls[streams[st].decl_count++] = decl;
buf_offsets[buf] += so_info->output[i].num_components;
}
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
streams[2].decl_count, streams[3].decl_count);
cmd_decl_count = sol->decl_count;
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 201:
@ -1146,28 +1057,22 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
* whenever this command is issued. The "Num Entries [n]" fields
* still contain the actual numbers of valid decls."
*/
hw_decl_count = 128;
cmd_decl_count = 128;
}
cmd_len = 3 + 2 * hw_decl_count;
cmd_len = 3 + 2 * cmd_decl_count;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
dw += 3;
/* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */
dw[1] = sol->so[4];
dw[2] = sol->so[5];
memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count);
for (i = 0; i < hw_decl_count; i++) {
dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
dw += 2;
if (sol->decl_count < cmd_decl_count) {
memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) *
cmd_decl_count - sol->decl_count);
}
}

View file

@ -61,6 +61,8 @@ struct ilo_blitter {
struct ilo_ve_state ve;
struct pipe_draw_info draw;
struct ilo_state_sol sol;
struct ilo_state_viewport vp;
uint32_t vp_data[20];

View file

@ -64,6 +64,8 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
blitter->draw.mode = ILO_PRIM_RECTANGLES;
blitter->draw.count = 3;
ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false);
/**
* From the Haswell PRM, volume 7, page 615:
*

View file

@ -420,7 +420,7 @@ gen7_draw_sol(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
const struct pipe_stream_output_info *so_info;
const struct ilo_state_sol *sol;
const struct ilo_shader_state *shader;
bool dirty_sh = false;
@ -433,13 +433,16 @@ gen7_draw_sol(struct ilo_render *r,
dirty_sh = DIRTY(VS);
}
so_info = ilo_shader_get_kernel_so_info(shader);
sol = ilo_shader_get_kernel_sol(shader);
/* 3DSTATE_SO_BUFFER */
if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
vec->so.enabled) {
const struct pipe_stream_output_info *so_info;
int i;
so_info = ilo_shader_get_kernel_so_info(shader);
for (i = 0; i < vec->so.count; i++) {
const int stride = so_info->stride[i] * 4; /* in bytes */
@ -452,22 +455,30 @@ gen7_draw_sol(struct ilo_render *r,
/* 3DSTATE_SO_DECL_LIST */
if (dirty_sh && vec->so.enabled)
gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
gen7_3DSTATE_SO_DECL_LIST(r->builder, sol);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 196-197:
*
* "Anytime the SOL unit MMIO registers or non-pipeline state are
* written, the SOL unit needs to receive a pipeline state update with
* SOL unit dirty state for information programmed in MMIO/NP to get
* loaded into the SOL unit.
*
* The SOL unit incorrectly double buffers MMIO/NP registers and only
* moves them into the design for usage when control topology is
* received with the SOL unit dirty state.
*
* If the state does not change, need to resend the same state.
*
* Because of corruption, software must flush the whole fixed function
* pipeline when 3DSTATE_STREAMOUT changes state."
*
* The first and fourth paragraphs are gone on Gen7.5+.
*/
/* 3DSTATE_STREAMOUT */
if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
const int output_count = ilo_shader_get_kernel_param(shader,
ILO_KERNEL_OUTPUT_COUNT);
int buf_strides[4] = { 0, 0, 0, 0 };
int i;
for (i = 0; i < vec->so.count; i++)
buf_strides[i] = so_info->stride[i] * 4;
gen7_3DSTATE_STREAMOUT(r->builder, 0,
vec->rasterizer->state.rasterizer_discard,
output_count, buf_strides);
}
gen7_3DSTATE_STREAMOUT(r->builder, sol);
}
static void
@ -717,7 +728,7 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
gen7_disable_3DSTATE_GS(r->builder);
gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);

View file

@ -557,39 +557,103 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state,
}
static void
copy_so_info(struct ilo_shader *sh,
const struct pipe_stream_output_info *so_info)
init_sol(struct ilo_shader *kernel,
const struct ilo_dev *dev,
const struct pipe_stream_output_info *so_info,
bool rasterizer_discard)
{
unsigned i, attr;
struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
struct ilo_state_sol_info info;
unsigned i;
if (!so_info->num_outputs)
if (!so_info->num_outputs) {
ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
return;
}
sh->so_info = *so_info;
memset(&info, 0, sizeof(info));
info.data = kernel->sol_data;
info.data_size = sizeof(kernel->sol_data);
info.sol_enable = true;
info.stats_enable = true;
info.tristrip_reorder = GEN7_REORDER_TRAILING;
info.render_disable = rasterizer_discard;
info.render_stream = 0;
for (i = 0; i < 4; i++) {
info.buffer_strides[i] = so_info->stride[i] * 4;
info.streams[i].cv_vue_attr_count = kernel->out.count;
info.streams[i].decls = decls[i];
}
memset(decls, 0, sizeof(decls));
memset(buf_offsets, 0, sizeof(buf_offsets));
for (i = 0; i < so_info->num_outputs; i++) {
const unsigned stream = so_info->output[i].stream;
const unsigned buffer = so_info->output[i].output_buffer;
struct ilo_state_sol_decl_info *decl;
unsigned attr;
/* figure out which attribute is sourced */
for (attr = 0; attr < sh->out.count; attr++) {
const int reg_idx = sh->out.register_indices[attr];
for (attr = 0; attr < kernel->out.count; attr++) {
const int reg_idx = kernel->out.register_indices[attr];
if (reg_idx == so_info->output[i].register_index)
break;
}
if (attr < sh->out.count) {
sh->so_info.output[i].register_index = attr;
}
else {
if (attr >= kernel->out.count) {
assert(!"stream output an undefined register");
sh->so_info.output[i].register_index = 0;
attr = 0;
}
if (info.streams[stream].vue_read_count < attr + 1)
info.streams[stream].vue_read_count = attr + 1;
/* pad with holes first */
while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
int num_dwords;
num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
if (num_dwords > 4)
num_dwords = 4;
assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
decl = &decls[stream][info.streams[stream].decl_count];
decl->attr = 0;
decl->is_hole = true;
decl->component_base = 0;
decl->component_count = num_dwords;
decl->buffer = buffer;
info.streams[stream].decl_count++;
buf_offsets[buffer] += num_dwords;
}
assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
decl = &decls[stream][info.streams[stream].decl_count];
decl->attr = attr;
decl->is_hole = false;
/* PSIZE is at W channel */
if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
assert(so_info->output[i].start_component == 0);
assert(so_info->output[i].num_components == 1);
sh->so_info.output[i].start_component = 3;
decl->component_base = 3;
decl->component_count = 1;
} else {
decl->component_base = so_info->output[i].start_component;
decl->component_count = so_info->output[i].num_components;
}
decl->buffer = buffer;
info.streams[stream].decl_count++;
buf_offsets[buffer] += so_info->output[i].num_components;
}
ilo_state_sol_init(&kernel->sol, dev, &info);
}
/**
@ -599,17 +663,20 @@ static struct ilo_shader *
ilo_shader_state_add_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
bool rasterizer_discard = false;
struct ilo_shader *sh;
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
sh = ilo_shader_compile_vs(state, variant);
rasterizer_discard = variant->u.vs.rasterizer_discard;
break;
case PIPE_SHADER_FRAGMENT:
sh = ilo_shader_compile_fs(state, variant);
break;
case PIPE_SHADER_GEOMETRY:
sh = ilo_shader_compile_gs(state, variant);
rasterizer_discard = variant->u.gs.rasterizer_discard;
break;
case PIPE_SHADER_COMPUTE:
sh = ilo_shader_compile_cs(state, variant);
@ -625,7 +692,8 @@ ilo_shader_state_add_variant(struct ilo_shader_state *state,
sh->variant = *variant;
copy_so_info(sh, &state->info.stream_output);
init_sol(sh, state->info.dev, &state->info.stream_output,
rasterizer_discard);
ilo_shader_state_add_shader(state, sh);
@ -1163,12 +1231,18 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
*/
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
{
return &shader->info.stream_output;
}
const struct ilo_state_sol *
ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
assert(kernel);
return &kernel->so_info;
return &kernel->sol;
}
/**

View file

@ -96,6 +96,7 @@ struct ilo_rasterizer_state;
struct ilo_shader_cache;
struct ilo_shader_state;
struct ilo_shader_cso;
struct ilo_state_sol;
struct ilo_state_vector;
struct ilo_shader_cache *
@ -168,6 +169,9 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
const struct ilo_state_sol *
ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader);
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);

View file

@ -32,6 +32,7 @@
#include "core/ilo_state_cc.h"
#include "core/ilo_state_raster.h"
#include "core/ilo_state_sampler.h"
#include "core/ilo_state_sol.h"
#include "core/ilo_state_surface.h"
#include "core/ilo_state_viewport.h"
#include "core/ilo_state_zs.h"

View file

@ -28,6 +28,8 @@
#ifndef ILO_SHADER_INTERNAL_H
#define ILO_SHADER_INTERNAL_H
#include "core/ilo_state_sol.h"
#include "ilo_common.h"
#include "ilo_state.h"
#include "ilo_shader.h"
@ -111,7 +113,9 @@ struct ilo_shader {
bool stream_output;
int svbi_post_inc;
struct pipe_stream_output_info so_info;
uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2];
struct ilo_state_sol sol;
/* for VS stream output / rasterizer discard */
int gs_offsets[3];