ilo: move SBE setup code to ilo_shader.c

Add ilo_shader_select_kernel_routing() to construct 3DSTATE_SBE.  It is called
in ilo_finalize_states(), rather than in create_fs_state(), as it depends on
VS/GS and rasterizer states.

With this change, ilo_shader_internal.h is no longer needed for
ilo_gpe_gen6.c.
This commit is contained in:
Chia-I Wu 2013-06-24 14:13:33 +08:00
parent c4fa24ff08
commit 9b18df6e08
6 changed files with 237 additions and 155 deletions

View file

@ -30,7 +30,6 @@
#include "brw_defines.h"
#include "intel_reg.h"
#include "shader/ilo_shader_internal.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_format.h"
@ -1814,178 +1813,52 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs_state,
const struct ilo_shader_state *last_sh_state,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
uint32_t *dw, int num_dwords)
{
const struct ilo_shader *fs = fs_state->shader;
const struct ilo_shader *last_sh = last_sh_state->shader;
uint32_t point_sprite_enable, const_interp_enable;
uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
int vue_offset, vue_len;
int dst, max_src, i;
int output_count, vue_offset, vue_len;
const struct ilo_kernel_routing *routing;
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(num_dwords == 13);
if (!fs) {
memset(dw, 0, sizeof(dw[0]) * num_dwords);
if (dev->gen >= ILO_GEN(7))
dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
else
dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
for (i = 1; i < num_dwords; i++)
dw[i] = 0;
return;
}
if (last_sh) {
/* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
vue_offset = 2;
vue_len = last_sh->out.count - vue_offset;
}
else {
vue_offset = 0;
vue_len = fs->in.count;
}
output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
assert(output_count <= 32);
point_sprite_enable = 0;
const_interp_enable = 0;
max_src = (last_sh) ? 0 : fs->in.count - 1;
routing = ilo_shader_get_kernel_routing(fs);
for (dst = 0; dst < fs->in.count; dst++) {
const int semantic = fs->in.semantic_names[dst];
const int index = fs->in.semantic_indices[dst];
const int interp = fs->in.interp[dst];
int src;
uint16_t ctrl;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 268:
*
* "This field (Point Sprite Texture Coordinate Enable) must be
* programmed to 0 when non-point primitives are rendered."
*
* TODO We do not check that yet.
*/
if (semantic == TGSI_SEMANTIC_GENERIC &&
(rasterizer->state.sprite_coord_enable & (1 << index)))
point_sprite_enable |= 1 << dst;
if (interp == TGSI_INTERPOLATE_CONSTANT ||
(interp == TGSI_INTERPOLATE_COLOR && rasterizer->state.flatshade))
const_interp_enable |= 1 << dst;
if (!last_sh) {
attr_ctrl[dst] = 0;
continue;
}
/* find the matching VS/GS OUT for FS IN[i] */
ctrl = 0;
for (src = 0; src < vue_len; src++) {
if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
last_sh->out.semantic_indices[vue_offset + src] != index)
continue;
ctrl = src;
if (semantic == TGSI_SEMANTIC_COLOR &&
rasterizer->state.light_twoside &&
src < vue_len - 1) {
const int next = src + 1;
if (last_sh->out.semantic_names[vue_offset + next] ==
TGSI_SEMANTIC_BCOLOR &&
last_sh->out.semantic_indices[vue_offset + next] == index) {
ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
ATTRIBUTE_SWIZZLE_SHIFT;
src++;
}
}
break;
}
/* if there is no COLOR, try BCOLOR */
if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
for (src = 0; src < vue_len; src++) {
if (last_sh->out.semantic_names[vue_offset + src] !=
TGSI_SEMANTIC_BCOLOR ||
last_sh->out.semantic_indices[vue_offset + src] != index)
continue;
ctrl = src;
break;
}
}
if (src < vue_len) {
attr_ctrl[dst] = ctrl;
if (max_src < src)
max_src = src;
}
else {
/*
* The previous shader stage does not output this attribute. The
* value is supposed to be undefined for fs, unless the attribute
* goes through point sprite replacement or the attribute is
* TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
* attribute is picked.
*
* We should update the fs code and omit the output of
* TGSI_SEMANTIC_POSITION here.
*/
attr_ctrl[dst] = 0;
}
}
for (; dst < Elements(attr_ctrl); dst++)
attr_ctrl[dst] = 0;
/* only the first 16 attributes can be remapped */
for (dst = 16; dst < Elements(attr_ctrl); dst++)
assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read.
*
* This field should be set to the minimum length required to read the
* maximum source attribute. The maximum source attribute is indicated
* by the maximum value of the enabled Attribute # Source Attribute if
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
* enable is not set.
*
* read_length = ceiling((max_source_attr+1)/2)
*
* [errata] Corruption/Hang possible if length programmed larger than
* recommended"
*/
vue_len = max_src + 1;
assert(fs->in.count <= 32);
vue_offset = routing->source_skip;
assert(vue_offset % 2 == 0);
vue_offset /= 2;
vue_len = (routing->source_len + 1) / 2;
if (!vue_len)
vue_len = 1;
if (dev->gen >= ILO_GEN(7)) {
dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
(vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
if (last_sh)
dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
if (routing->swizzle_enable)
dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
}
else {
dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
(vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
if (last_sh)
dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
if (routing->swizzle_enable)
dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
}
@ -1998,11 +1871,20 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
break;
}
for (i = 0; i < 8; i++)
dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
STATIC_ASSERT(Elements(routing->swizzles) >= 16);
memcpy(&dw[1], routing->swizzles, 2 * 16);
dw[9] = point_sprite_enable;
dw[10] = const_interp_enable;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 268:
*
* "This field (Point Sprite Texture Coordinate Enable) must be
* programmed to 0 when non-point primitives are rendered."
*
* TODO We do not check that yet.
*/
dw[9] = routing->point_sprite_enable;
dw[10] = routing->const_interp_enable;
/* WrapShortest enables */
dw[11] = 0;

View file

@ -27,6 +27,7 @@
#include "tgsi/tgsi_parse.h"
#include "intel_winsys.h"
#include "brw_defines.h" /* for SBE setup */
#include "shader/ilo_shader_internal.h"
#include "ilo_state.h"
@ -848,6 +849,157 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
return (shader->shader != cur);
}
static int
route_attr(const int *semantics, const int *indices, int len,
int semantic, int index)
{
int i;
for (i = 0; i < len; i++) {
if (semantics[i] == semantic && indices[i] == index)
return i;
}
/* failed to match for COLOR, try BCOLOR */
if (semantic == TGSI_SEMANTIC_COLOR) {
for (i = 0; i < len; i++) {
if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
return i;
}
}
return -1;
}
/**
* Select a routing for the given source shader and rasterizer state.
*
* \return true if a different routing is selected
*/
bool
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
const struct ilo_shader_state *source,
const struct ilo_rasterizer_state *rasterizer)
{
const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
const bool light_twoside = rasterizer->state.light_twoside;
struct ilo_shader *kernel = shader->shader;
struct ilo_kernel_routing *routing = &kernel->routing;
const int *src_semantics, *src_indices;
int src_len, max_src_slot;
int dst_len, dst_slot;
/* we are constructing 3DSTATE_SBE here */
assert(shader->info.dev->gen >= ILO_GEN(6) &&
shader->info.dev->gen <= ILO_GEN(7));
assert(kernel);
if (source) {
assert(source->shader);
src_semantics = source->shader->out.semantic_names;
src_indices = source->shader->out.semantic_indices;
src_len = source->shader->out.count;
/* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
routing->source_skip = 2;
routing->source_len = src_len - routing->source_skip;
src_semantics += routing->source_skip;
src_indices += routing->source_skip;
}
else {
src_semantics = kernel->in.semantic_names;
src_indices = kernel->in.semantic_indices;
src_len = kernel->in.count;
routing->source_skip = 0;
routing->source_len = src_len;
}
routing->const_interp_enable = kernel->in.const_interp_enable;
routing->point_sprite_enable = 0;
routing->swizzle_enable = false;
assert(kernel->in.count <= Elements(routing->swizzles));
dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
max_src_slot = -1;
for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
const int semantic = kernel->in.semantic_names[dst_slot];
const int index = kernel->in.semantic_indices[dst_slot];
int src_slot;
if (semantic == TGSI_SEMANTIC_GENERIC &&
(sprite_coord_enable & (1 << index)))
routing->point_sprite_enable |= 1 << dst_slot;
if (source) {
src_slot = route_attr(src_semantics, src_indices,
routing->source_len, semantic, index);
/*
* The source shader stage does not output this attribute. The value
* is supposed to be undefined, unless the attribute goes through
* point sprite replacement or the attribute is
* TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
* attribute is picked.
*
* We should update the kernel code and omit the output of
* TGSI_SEMANTIC_POSITION here.
*/
if (src_slot < 0)
src_slot = 0;
}
else {
src_slot = dst_slot;
}
routing->swizzles[dst_slot] = src_slot;
/* use the following slot for two-sided lighting */
if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
src_slot + 1 < routing->source_len &&
src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
src_indices[src_slot + 1] == index) {
routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
ATTRIBUTE_SWIZZLE_SHIFT;
src_slot++;
}
if (routing->swizzles[dst_slot] != dst_slot)
routing->swizzle_enable = true;
if (max_src_slot < src_slot)
max_src_slot = src_slot;
}
memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
sizeof(routing->swizzles[0]) * dst_slot);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read.
*
* This field should be set to the minimum length required to read the
* maximum source attribute. The maximum source attribute is indicated
* by the maximum value of the enabled Attribute # Source Attribute if
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
* enable is not set.
*
* read_length = ceiling((max_source_attr+1)/2)
*
* [errata] Corruption/Hang possible if length programmed larger than
* recommended"
*/
routing->source_len = max_src_slot + 1;
return true;
}
/**
* Return the cache offset of the selected kernel. This must be called after
* ilo_shader_select_kernel() and ilo_shader_cache_upload().
@ -978,3 +1130,16 @@ ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
return &kernel->so_info;
}
/**
* Return the routing info of the selected kernel.
*/
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
assert(kernel);
return &kernel->routing;
}

View file

@ -58,8 +58,18 @@ enum ilo_kernel_param {
ILO_KERNEL_PARAM_COUNT,
};
struct ilo_kernel_routing {
uint32_t const_interp_enable;
uint32_t point_sprite_enable;
unsigned source_skip, source_len;
bool swizzle_enable;
uint16_t swizzles[16];
};
struct intel_bo;
struct ilo_context;
struct ilo_rasterizer_state;
struct ilo_shader_cache;
struct ilo_shader_state;
struct ilo_shader_cso;
@ -114,6 +124,11 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_context *ilo,
uint32_t dirty);
bool
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
const struct ilo_shader_state *source,
const struct ilo_rasterizer_state *rasterizer);
uint32_t
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader);
@ -127,4 +142,7 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
#endif /* ILO_SHADER_H */

View file

@ -72,6 +72,14 @@ finalize_shader_states(struct ilo_context *ilo)
/* mark the state dirty if a new kernel is selected */
ilo->dirty |= state;
}
/* need to setup SBE for FS */
if (type == PIPE_SHADER_FRAGMENT && ilo->dirty &
(state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
if (ilo_shader_select_kernel_routing(shader,
(ilo->gs) ? ilo->gs : ilo->vs, ilo->rasterizer))
ilo->dirty |= state;
}
}
}

View file

@ -1574,6 +1574,9 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
}
switch (tgsi->inputs[i].interp) {
case TGSI_INTERPOLATE_CONSTANT:
sh->in.const_interp_enable |= 1 << i;
break;
case TGSI_INTERPOLATE_LINEAR:
sh->in.has_linear_interp = true;
@ -1587,8 +1590,10 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
}
break;
case TGSI_INTERPOLATE_COLOR:
if (flatshade)
if (flatshade) {
sh->in.const_interp_enable |= 1 << i;
break;
}
/* fall through */
case TGSI_INTERPOLATE_PERSPECTIVE:
if (tgsi->inputs[i].centroid) {

View file

@ -30,6 +30,7 @@
#include "ilo_common.h"
#include "ilo_context.h"
#include "ilo_shader.h"
/* XXX The interface needs to be reworked */
@ -88,6 +89,7 @@ struct ilo_shader {
bool has_pos;
bool has_linear_interp;
int barycentric_interpolation_mode;
uint32_t const_interp_enable;
bool discard_adj;
} in;
@ -114,6 +116,8 @@ struct ilo_shader {
void *kernel;
int kernel_size;
struct ilo_kernel_routing routing;
/* what does the push constant buffer consist of? */
struct {
int clip_state_size;