ilo: let shaders determine sampler counts

When a shader needs N samplers, we should upload N samplers and not depend on
how many are bound.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
This commit is contained in:
Chia-I Wu 2014-10-04 10:51:20 +08:00
parent 0c4bc1e292
commit ca824e6940
13 changed files with 98 additions and 87 deletions

View file

@ -69,12 +69,12 @@ ilo_blitter_pipe_begin(struct ilo_blitter *blitter,
case ILO_BLITTER_PIPE_BLIT: case ILO_BLITTER_PIPE_BLIT:
case ILO_BLITTER_PIPE_COPY: case ILO_BLITTER_PIPE_COPY:
/* /*
* we are about to call util_blitter_blit() or * We are about to call util_blitter_blit() or
* util_blitter_copy_texture() * util_blitter_copy_texture(). Note that util_blitter uses at most two
* textures.
*/ */
util_blitter_save_fragment_sampler_states(b, util_blitter_save_fragment_sampler_states(b,
vec->sampler[PIPE_SHADER_FRAGMENT].count, 2, (void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso);
(void **) vec->sampler[PIPE_SHADER_FRAGMENT].cso);
util_blitter_save_fragment_sampler_views(b, util_blitter_save_fragment_sampler_views(b,
vec->view[PIPE_SHADER_FRAGMENT].count, vec->view[PIPE_SHADER_FRAGMENT].count,

View file

@ -312,6 +312,23 @@ ilo_builder_dynamic_write(struct ilo_builder *builder,
return offset; return offset;
} }
/**
* Reserve some space from the top (for prefetches).
*/
static inline void
ilo_builder_dynamic_pad_top(struct ilo_builder *builder, unsigned len)
{
const enum ilo_builder_writer_type which = ILO_BUILDER_WRITER_BATCH;
const unsigned size = len << 2;
struct ilo_builder_writer *writer = &builder->writers[which];
if (writer->stolen < size) {
ilo_builder_writer_reserve_top(builder, which,
1, size - writer->stolen);
writer->stolen = size;
}
}
static inline unsigned static inline unsigned
ilo_builder_dynamic_used(const struct ilo_builder *builder) ilo_builder_dynamic_used(const struct ilo_builder *builder)
{ {

View file

@ -273,7 +273,6 @@ gen7_3DSTATE_SBE(struct ilo_builder *builder,
static inline void static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder, gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_shader_state *fs, const struct ilo_shader_state *fs,
int num_samplers,
const struct ilo_rasterizer_state *rasterizer, const struct ilo_rasterizer_state *rasterizer,
bool dual_blend, bool cc_may_kill, bool dual_blend, bool cc_may_kill,
uint32_t hiz_op) uint32_t hiz_op)
@ -311,8 +310,6 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
dw5 = fs_cso->payload[2]; dw5 = fs_cso->payload[2];
dw6 = fs_cso->payload[3]; dw6 = fs_cso->payload[3];
dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
/* /*
* From the Sandy Bridge PRM, volume 2 part 1, page 248: * From the Sandy Bridge PRM, volume 2 part 1, page 248:
* *
@ -399,7 +396,7 @@ gen7_3DSTATE_WM(struct ilo_builder *builder,
static inline void static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder, gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_shader_state *fs, const struct ilo_shader_state *fs,
int num_samplers, bool dual_blend) bool dual_blend)
{ {
const uint8_t cmd_len = 8; const uint8_t cmd_len = 8;
const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
@ -446,8 +443,6 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
dw4 = cso->payload[1]; dw4 = cso->payload[1];
dw5 = cso->payload[2]; dw5 = cso->payload[2];
dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
if (dual_blend) if (dual_blend)
dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;

View file

@ -544,8 +544,7 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
static inline void static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder, gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_shader_state *vs, const struct ilo_shader_state *vs)
int num_samplers)
{ {
const uint8_t cmd_len = 6; const uint8_t cmd_len = 6;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
@ -571,8 +570,6 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
dw4 = cso->payload[1]; dw4 = cso->payload[1];
dw5 = cso->payload[2]; dw5 = cso->payload[2];
dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
ilo_builder_batch_pointer(builder, cmd_len, &dw); ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0; dw[0] = dw0;
dw[1] = ilo_shader_get_kernel_offset(vs); dw[1] = ilo_shader_get_kernel_offset(vs);
@ -584,8 +581,7 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
static inline void static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder, gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_shader_state *hs, const struct ilo_shader_state *hs)
int num_samplers)
{ {
const uint8_t cmd_len = 7; const uint8_t cmd_len = 7;
uint32_t *dw; uint32_t *dw;
@ -623,8 +619,7 @@ gen7_3DSTATE_TE(struct ilo_builder *builder)
static inline void static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder, gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_shader_state *ds, const struct ilo_shader_state *ds)
int num_samplers)
{ {
const uint8_t cmd_len = 6; const uint8_t cmd_len = 6;
uint32_t *dw; uint32_t *dw;
@ -736,8 +731,7 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder, gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_shader_state *gs, const struct ilo_shader_state *gs)
int num_samplers)
{ {
const uint8_t cmd_len = 7; const uint8_t cmd_len = 7;
const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
@ -763,8 +757,6 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
dw4 = cso->payload[1]; dw4 = cso->payload[1];
dw5 = cso->payload[2]; dw5 = cso->payload[2];
dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
ilo_builder_batch_pointer(builder, cmd_len, &dw); ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0; dw[0] = dw0;
@ -1423,6 +1415,17 @@ gen6_SAMPLER_STATE(struct ilo_builder *builder,
if (!num_samplers) if (!num_samplers)
return 0; return 0;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 132:
*
* "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in
* multiples of 4) the vertex shader 0 kernel uses. Used only for
* prefetching the associated sampler state entries.
*
* It also applies to other shader stages.
*/
ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
state_offset = ilo_builder_dynamic_pointer(builder, state_offset = ilo_builder_dynamic_pointer(builder,
ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);

View file

@ -134,9 +134,8 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
vec->sampler[shader_type].cso; vec->sampler[shader_type].cso;
const struct pipe_sampler_view * const *views = const struct pipe_sampler_view * const *views =
(const struct pipe_sampler_view **) vec->view[shader_type].states; (const struct pipe_sampler_view **) vec->view[shader_type].states;
const int num_samplers = vec->sampler[shader_type].count;
const int num_views = vec->view[shader_type].count;
uint32_t *sampler_state, *border_color_state; uint32_t *sampler_state, *border_color_state;
int sampler_count;
bool emit_border_color = false; bool emit_border_color = false;
bool skip = false; bool skip = false;
@ -145,26 +144,32 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
/* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */ /* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */
switch (shader_type) { switch (shader_type) {
case PIPE_SHADER_VERTEX: case PIPE_SHADER_VERTEX:
if (DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) { if (DIRTY(VS) || DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) {
sampler_state = &r->state.vs.SAMPLER_STATE; sampler_state = &r->state.vs.SAMPLER_STATE;
border_color_state = r->state.vs.SAMPLER_BORDER_COLOR_STATE; border_color_state = r->state.vs.SAMPLER_BORDER_COLOR_STATE;
if (DIRTY(SAMPLER_VS)) if (DIRTY(VS) || DIRTY(SAMPLER_VS))
emit_border_color = true; emit_border_color = true;
sampler_count = (vec->vs) ? ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_SAMPLER_COUNT) : 0;
session->sampler_vs_changed = true; session->sampler_vs_changed = true;
} else { } else {
skip = true; skip = true;
} }
break; break;
case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_FRAGMENT:
if (DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) { if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) {
sampler_state = &r->state.wm.SAMPLER_STATE; sampler_state = &r->state.wm.SAMPLER_STATE;
border_color_state = r->state.wm.SAMPLER_BORDER_COLOR_STATE; border_color_state = r->state.wm.SAMPLER_BORDER_COLOR_STATE;
if (DIRTY(SAMPLER_FS)) if (DIRTY(VS) || DIRTY(SAMPLER_FS))
emit_border_color = true; emit_border_color = true;
sampler_count = (vec->fs) ? ilo_shader_get_kernel_param(vec->fs,
ILO_KERNEL_SAMPLER_COUNT) : 0;
session->sampler_fs_changed = true; session->sampler_fs_changed = true;
} else { } else {
skip = true; skip = true;
@ -178,20 +183,20 @@ gen6_emit_draw_dynamic_samplers(struct ilo_render *r,
if (skip) if (skip)
return; return;
assert(sampler_count <= Elements(vec->view[shader_type].states) &&
sampler_count <= Elements(vec->sampler[shader_type].cso));
if (emit_border_color) { if (emit_border_color) {
int i; int i;
for (i = 0; i < num_samplers; i++) { for (i = 0; i < sampler_count; i++) {
border_color_state[i] = (samplers[i]) ? border_color_state[i] = (samplers[i]) ?
gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0; gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
} }
} }
/* should we take the minimum of num_samplers and num_views? */
*sampler_state = gen6_SAMPLER_STATE(r->builder, *sampler_state = gen6_SAMPLER_STATE(r->builder,
samplers, views, samplers, views, border_color_state, sampler_count);
border_color_state,
MIN2(num_samplers, num_views));
} }
static void static void
@ -322,14 +327,13 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) { for (sh_type = 0; sh_type < PIPE_SHADER_TYPES; sh_type++) {
const int alignment = 32 / 4; const int alignment = 32 / 4;
int num_samplers, pcb_len; int num_samplers = 0, pcb_len = 0;
num_samplers = vec->sampler[sh_type].count;
pcb_len = 0;
switch (sh_type) { switch (sh_type) {
case PIPE_SHADER_VERTEX: case PIPE_SHADER_VERTEX:
if (vec->vs) { if (vec->vs) {
num_samplers = ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_SAMPLER_COUNT);
pcb_len = ilo_shader_get_kernel_param(vec->vs, pcb_len = ilo_shader_get_kernel_param(vec->vs,
ILO_KERNEL_PCB_CBUF0_SIZE); ILO_KERNEL_PCB_CBUF0_SIZE);
pcb_len += ilo_shader_get_kernel_param(vec->vs, pcb_len += ilo_shader_get_kernel_param(vec->vs,
@ -340,6 +344,8 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
break; break;
case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_FRAGMENT:
if (vec->fs) { if (vec->fs) {
num_samplers = ilo_shader_get_kernel_param(vec->fs,
ILO_KERNEL_SAMPLER_COUNT);
pcb_len = ilo_shader_get_kernel_param(vec->fs, pcb_len = ilo_shader_get_kernel_param(vec->fs,
ILO_KERNEL_PCB_CBUF0_SIZE); ILO_KERNEL_PCB_CBUF0_SIZE);
} }
@ -350,6 +356,9 @@ ilo_render_get_draw_dynamic_states_len(const struct ilo_render *render,
/* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */ /* SAMPLER_STATE array and SAMPLER_BORDER_COLORs */
if (num_samplers) { if (num_samplers) {
/* prefetches are done in multiples of 4 */
num_samplers = align(num_samplers, 4);
len += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) + len += align(GEN6_SAMPLER_STATE__SIZE * num_samplers, alignment) +
align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers; align(GEN6_SAMPLER_BORDER_COLOR__SIZE, alignment) * num_samplers;
} }

View file

@ -496,8 +496,7 @@ gen6_draw_vs(struct ilo_render *r,
const struct ilo_state_vector *vec, const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session) struct ilo_render_draw_session *session)
{ {
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
r->instruction_bo_changed);
const bool emit_3dstate_constant_vs = session->pcb_vs_changed; const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
/* /*
@ -516,11 +515,8 @@ gen6_draw_vs(struct ilo_render *r,
} }
/* 3DSTATE_VS */ /* 3DSTATE_VS */
if (emit_3dstate_vs) { if (emit_3dstate_vs)
const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count; gen6_3DSTATE_VS(r->builder, vec->vs);
gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
}
if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6)) if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
gen6_wa_post_3dstate_constant_vs(r); gen6_wa_post_3dstate_constant_vs(r);
@ -692,9 +688,8 @@ gen6_draw_wm(struct ilo_render *r,
} }
/* 3DSTATE_WM */ /* 3DSTATE_WM */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) || if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) ||
DIRTY(RASTERIZER) || r->instruction_bo_changed) { DIRTY(RASTERIZER) || r->instruction_bo_changed) {
const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = vec->blend->dual_blend; const bool dual_blend = vec->blend->dual_blend;
const bool cc_may_kill = (vec->dsa->dw_alpha || const bool cc_may_kill = (vec->dsa->dw_alpha ||
vec->blend->alpha_to_coverage); vec->blend->alpha_to_coverage);
@ -702,7 +697,7 @@ gen6_draw_wm(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
gen6_wa_pre_3dstate_wm_max_threads(r); gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, vec->fs, num_samplers, gen6_3DSTATE_WM(r->builder, vec->fs,
vec->rasterizer, dual_blend, cc_may_kill, 0); vec->rasterizer, dual_blend, cc_may_kill, 0);
} }
} }
@ -849,7 +844,7 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter) const struct ilo_blitter *blitter)
{ {
gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
gen6_3DSTATE_VS(r->builder, NULL, 0); gen6_3DSTATE_VS(r->builder, NULL);
gen6_wa_post_3dstate_constant_vs(r); gen6_wa_post_3dstate_constant_vs(r);
@ -884,7 +879,7 @@ gen6_rectlist_wm(struct ilo_render *r,
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r); gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, NULL, 0, NULL, false, false, hiz_op); gen6_3DSTATE_WM(r->builder, NULL, NULL, false, false, hiz_op);
} }
static void static void

View file

@ -338,8 +338,7 @@ gen7_draw_vs(struct ilo_render *r,
session->sampler_vs_changed; session->sampler_vs_changed;
/* see gen6_draw_vs() */ /* see gen6_draw_vs() */
const bool emit_3dstate_constant_vs = session->pcb_vs_changed; const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
r->instruction_bo_changed);
/* emit depth stall before any of the VS commands */ /* emit depth stall before any of the VS commands */
if (emit_3dstate_binding_table || emit_3dstate_sampler_state || if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
@ -367,11 +366,8 @@ gen7_draw_vs(struct ilo_render *r,
} }
/* 3DSTATE_VS */ /* 3DSTATE_VS */
if (emit_3dstate_vs) { if (emit_3dstate_vs)
const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count; gen6_3DSTATE_VS(r->builder, vec->vs);
gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
}
} }
static void static void
@ -382,7 +378,7 @@ gen7_draw_hs(struct ilo_render *r,
/* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */ /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
if (r->hw_ctx_changed) { if (r->hw_ctx_changed) {
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
gen7_3DSTATE_HS(r->builder, NULL, 0); gen7_3DSTATE_HS(r->builder, NULL);
} }
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */ /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@ -408,7 +404,7 @@ gen7_draw_ds(struct ilo_render *r,
/* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */ /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
if (r->hw_ctx_changed) { if (r->hw_ctx_changed) {
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
gen7_3DSTATE_DS(r->builder, NULL, 0); gen7_3DSTATE_DS(r->builder, NULL);
} }
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */ /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@ -425,7 +421,7 @@ gen7_draw_gs(struct ilo_render *r,
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
if (r->hw_ctx_changed) { if (r->hw_ctx_changed) {
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
gen7_3DSTATE_GS(r->builder, NULL, 0); gen7_3DSTATE_GS(r->builder, NULL);
} }
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */ /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@ -541,9 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
} }
/* 3DSTATE_PS */ /* 3DSTATE_PS */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
r->instruction_bo_changed) {
const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = vec->blend->dual_blend; const bool dual_blend = vec->blend->dual_blend;
if ((ilo_dev_gen(r->dev) == ILO_GEN(7) || if ((ilo_dev_gen(r->dev) == ILO_GEN(7) ||
@ -551,7 +545,7 @@ gen7_draw_wm(struct ilo_render *r,
r->hw_ctx_changed) r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r); gen7_wa_pre_3dstate_ps_max_threads(r);
gen7_3DSTATE_PS(r->builder, vec->fs, num_samplers, dual_blend); gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
} }
/* 3DSTATE_SCISSOR_STATE_POINTERS */ /* 3DSTATE_SCISSOR_STATE_POINTERS */
@ -562,8 +556,7 @@ gen7_draw_wm(struct ilo_render *r,
/* XXX what is the best way to know if this workaround is needed? */ /* XXX what is the best way to know if this workaround is needed? */
{ {
const bool emit_3dstate_ps = const bool emit_3dstate_ps = (DIRTY(FS) || DIRTY(BLEND));
(DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND));
const bool emit_3dstate_depth_buffer = const bool emit_3dstate_depth_buffer =
(DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed); (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed);
@ -729,18 +722,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter) const struct ilo_blitter *blitter)
{ {
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
gen6_3DSTATE_VS(r->builder, NULL, 0); gen6_3DSTATE_VS(r->builder, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_HS(r->builder, NULL, 0); gen7_3DSTATE_HS(r->builder, NULL);
gen7_3DSTATE_TE(r->builder); gen7_3DSTATE_TE(r->builder);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_DS(r->builder, NULL, 0); gen7_3DSTATE_DS(r->builder, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_GS(r->builder, NULL, 0); gen7_3DSTATE_GS(r->builder, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, 0x0, 0, false); gen7_3DSTATE_STREAMOUT(r->builder, 0x0, 0, false);
@ -778,7 +771,7 @@ gen7_rectlist_wm(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r); gen7_wa_pre_3dstate_ps_max_threads(r);
gen7_3DSTATE_PS(r->builder, NULL, 0, false); gen7_3DSTATE_PS(r->builder, NULL, false);
} }
static void static void

View file

@ -1000,6 +1000,9 @@ ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
case ILO_KERNEL_OUTPUT_COUNT: case ILO_KERNEL_OUTPUT_COUNT:
val = kernel->out.count; val = kernel->out.count;
break; break;
case ILO_KERNEL_SAMPLER_COUNT:
val = shader->info.num_samplers;
break;
case ILO_KERNEL_URB_DATA_START_REG: case ILO_KERNEL_URB_DATA_START_REG:
val = kernel->in.start_grf; val = kernel->in.start_grf;
break; break;

View file

@ -33,6 +33,7 @@
enum ilo_kernel_param { enum ilo_kernel_param {
ILO_KERNEL_INPUT_COUNT, ILO_KERNEL_INPUT_COUNT,
ILO_KERNEL_OUTPUT_COUNT, ILO_KERNEL_OUTPUT_COUNT,
ILO_KERNEL_SAMPLER_COUNT,
ILO_KERNEL_URB_DATA_START_REG, ILO_KERNEL_URB_DATA_START_REG,
ILO_KERNEL_SKIP_CBUF0_UPLOAD, ILO_KERNEL_SKIP_CBUF0_UPLOAD,
ILO_KERNEL_PCB_CBUF0_SIZE, ILO_KERNEL_PCB_CBUF0_SIZE,

View file

@ -338,18 +338,6 @@ ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
dst->cso[start + i] = NULL; dst->cso[start + i] = NULL;
} }
if (dst->count <= start + count) {
if (samplers)
count += start;
else
count = start;
while (count > 0 && !dst->cso[count - 1])
count--;
dst->count = count;
}
if (changed) { if (changed) {
switch (shader) { switch (shader) {
case PIPE_SHADER_VERTEX: case PIPE_SHADER_VERTEX:

View file

@ -292,7 +292,6 @@ struct ilo_sampler_cso {
struct ilo_sampler_state { struct ilo_sampler_state {
const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
unsigned count;
}; };
struct ilo_view_surface { struct ilo_view_surface {

View file

@ -411,13 +411,14 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs, const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso) struct ilo_shader_cso *cso)
{ {
int start_grf, vue_read_len, max_threads; int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5; uint32_t dw2, dw4, dw5;
ILO_DEV_ASSERT(dev, 6, 7.5); ILO_DEV_ASSERT(dev, 6, 7.5);
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
/* /*
* From the Sandy Bridge PRM, volume 2 part 1, page 135: * From the Sandy Bridge PRM, volume 2 part 1, page 135:
@ -464,6 +465,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
} }
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@ -933,13 +935,14 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs, const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso) struct ilo_shader_cso *cso)
{ {
int start_grf, input_count, interps, max_threads; int start_grf, input_count, sampler_count, interps, max_threads;
uint32_t dw2, dw4, dw5, dw6; uint32_t dw2, dw4, dw5, dw6;
ILO_DEV_ASSERT(dev, 6, 6); ILO_DEV_ASSERT(dev, 6, 6);
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
interps = ilo_shader_get_kernel_param(fs, interps = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
@ -947,6 +950,7 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
max_threads = (dev->gt == 2) ? 80 : 40; max_threads = (dev->gt == 2) ? 80 : 40;
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |

View file

@ -39,13 +39,14 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs, const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso) struct ilo_shader_cso *cso)
{ {
int start_grf, vue_read_len, max_threads; int start_grf, vue_read_len, sampler_count, max_threads;
uint32_t dw2, dw4, dw5; uint32_t dw2, dw4, dw5;
ILO_DEV_ASSERT(dev, 7, 7.5); ILO_DEV_ASSERT(dev, 7, 7.5);
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
/* in pairs */ /* in pairs */
vue_read_len = (vue_read_len + 1) / 2; vue_read_len = (vue_read_len + 1) / 2;
@ -63,6 +64,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
} }
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
@ -131,15 +133,17 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs, const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso) struct ilo_shader_cso *cso)
{ {
int start_grf, max_threads; int start_grf, sampler_count, max_threads;
uint32_t dw2, dw4, dw5; uint32_t dw2, dw4, dw5;
uint32_t wm_interps, wm_dw1; uint32_t wm_interps, wm_dw1;
ILO_DEV_ASSERT(dev, 7, 7.5); ILO_DEV_ASSERT(dev, 7, 7.5);
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
dw4 = GEN7_PS_DW4_POSOFFSET_NONE; dw4 = GEN7_PS_DW4_POSOFFSET_NONE;