ilo: embed ilo_state_urb in ilo_state_vector

This commit is contained in:
Chia-I Wu 2015-05-28 13:43:56 +08:00
parent eaf2c73899
commit 9bfa987fb0
9 changed files with 189 additions and 354 deletions

View file

@ -38,290 +38,153 @@
#include "ilo_state_3d.h"
#include "ilo_state_sampler.h"
#include "ilo_state_sol.h"
#include "ilo_state_urb.h"
#include "ilo_builder.h"
static inline void
gen6_3DSTATE_URB(struct ilo_builder *builder,
int vs_total_size, int gs_total_size,
int vs_entry_size, int gs_entry_size)
const struct ilo_state_urb *urb)
{
const uint8_t cmd_len = 3;
const int row_size = 128; /* 1024 bits */
int vs_alloc_size, gs_alloc_size;
int vs_num_entries, gs_num_entries;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 6, 6);
/* in 1024-bit URB rows */
vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
/* the valid range is [1, 5] */
if (!vs_alloc_size)
vs_alloc_size = 1;
if (!gs_alloc_size)
gs_alloc_size = 1;
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
/* the valid range is [24, 256] in multiples of 4 */
vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
if (vs_num_entries > 256)
vs_num_entries = 256;
assert(vs_num_entries >= 24);
/* the valid range is [0, 256] in multiples of 4 */
gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
if (gs_num_entries > 256)
gs_num_entries = 256;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
(gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
}
static inline void
gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
int subop, int offset, int size)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 2;
const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
builder->dev->gt == 3) ||
ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
uint32_t *dw;
int end;
ILO_DEV_ASSERT(builder->dev, 7, 8);
/* VS, HS, DS, GS, and PS variants */
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 68:
*
* "(A table that says the maximum size of each constant buffer is
* 16KB")
*
* From the Ivy Bridge PRM, volume 2 part 1, page 115:
*
* "The sum of the Constant Buffer Offset and the Constant Buffer Size
* may not exceed the maximum value of the Constant Buffer Size."
*
* Thus, the valid range of buffer end is [0KB, 16KB].
*/
end = (offset + size) / 1024;
if (end > 16 * slice_count) {
assert(!"invalid constant buffer end");
end = 16 * slice_count;
}
/* the valid range of buffer offset is [0KB, 15KB] */
offset = (offset + 1023) / 1024;
if (offset > 15 * slice_count) {
assert(!"invalid constant buffer offset");
offset = 15 * slice_count;
}
if (offset > end) {
assert(!size);
offset = end;
}
/* the valid range of buffer size is [0KB, 15KB] */
size = end - offset;
if (size > 15 * slice_count) {
assert(!"invalid constant buffer size");
size = 15 * slice_count;
}
assert(offset % slice_count == 0 && size % slice_count == 0);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = cmd | (cmd_len - 2);
dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
size;
/* see urb_set_gen6_3DSTATE_URB() */
dw[1] = urb->urb[0];
dw[2] = urb->urb[1];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
int offset, int size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) |
(cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->pcb[0];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
int offset, int size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) |
(cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->pcb[1];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
int offset, int size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) |
(cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->pcb[2];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
int offset, int size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) |
(cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->pcb[3];
}
static inline void
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
int offset, int size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_push_constant_alloc(builder,
GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
}
static inline void
gen7_3dstate_urb(struct ilo_builder *builder,
int subop, int offset, int size,
int entry_size)
{
const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
GEN6_RENDER_SUBTYPE_3D |
subop;
const uint8_t cmd_len = 2;
const int row_size = 64; /* 512 bits */
int alloc_size, num_entries, min_entries, max_entries;
uint32_t *dw;
ILO_DEV_ASSERT(builder->dev, 7, 8);
/* VS, HS, DS, and GS variants */
assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
/* in multiples of 8KB */
assert(offset % 8192 == 0);
offset /= 8192;
/* in multiple of 512-bit rows */
alloc_size = (entry_size + row_size - 1) / row_size;
if (!alloc_size)
alloc_size = 1;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 34:
*
* "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
* cause performance to decrease due to banking in the URB. Element
* sizes of 16 to 20 should be programmed with six 512-bit URB rows."
*/
if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
alloc_size = 6;
/* in multiples of 8 */
num_entries = (size / row_size / alloc_size) & ~7;
switch (subop) {
case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
switch (ilo_dev_gen(builder->dev)) {
case ILO_GEN(8):
max_entries = 2560;
min_entries = 64;
break;
case ILO_GEN(7.5):
max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
min_entries = (builder->dev->gt >= 2) ? 64 : 32;
break;
case ILO_GEN(7):
default:
max_entries = (builder->dev->gt == 2) ? 704 : 512;
min_entries = 32;
break;
}
assert(num_entries >= min_entries);
if (num_entries > max_entries)
num_entries = max_entries;
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
max_entries = (builder->dev->gt == 2) ? 64 : 32;
if (num_entries > max_entries)
num_entries = max_entries;
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
if (num_entries)
assert(num_entries >= 138);
break;
case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
switch (ilo_dev_gen(builder->dev)) {
case ILO_GEN(8):
max_entries = 960;
break;
case ILO_GEN(7.5):
max_entries = (builder->dev->gt >= 2) ? 640 : 256;
break;
case ILO_GEN(7):
default:
max_entries = (builder->dev->gt == 2) ? 320 : 192;
break;
}
if (num_entries > max_entries)
num_entries = max_entries;
break;
default:
break;
}
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = cmd | (cmd_len - 2);
dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
(alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
num_entries;
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) |
(cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->pcb[4];
}
static inline void
gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
int offset, int size, int entry_size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
offset, size, entry_size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->urb[0];
}
static inline void
gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
int offset, int size, int entry_size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
offset, size, entry_size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->urb[1];
}
static inline void
gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
int offset, int size, int entry_size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
offset, size, entry_size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->urb[2];
}
static inline void
gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
int offset, int size, int entry_size)
const struct ilo_state_urb *urb)
{
gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
offset, size, entry_size);
const uint8_t cmd_len = 2;
uint32_t *dw;
ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
/* see urb_set_gen7_3dstate_push_constant_alloc() */
dw[1] = urb->urb[3];
}
static inline void

View file

@ -70,6 +70,8 @@ struct ilo_blitter {
uint32_t depth_clear_value;
struct ilo_state_urb urb;
struct {
struct ilo_surface_cso dst;
unsigned width, height;

View file

@ -78,6 +78,9 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter)
ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev,
blitter->vp_data, sizeof(blitter->vp_data));
ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
blitter->ve.count + blitter->ve.prepend_nosrc_cso);
blitter->initialized = true;
return true;

View file

@ -448,6 +448,8 @@ draw_session_prepare(struct ilo_render *render,
session->prim_changed = true;
session->primitive_restart_changed = true;
ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
&session->rs_delta);
@ -462,6 +464,9 @@ draw_session_prepare(struct ilo_render *render,
session->primitive_restart_changed =
(render->state.primitive_restart != vec->draw->primitive_restart);
ilo_state_urb_get_delta(&vec->urb, render->dev,
&render->state.urb, &session->urb_delta);
if (vec->dirty & ILO_DIRTY_RASTERIZER) {
ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
&render->state.rs, &session->rs_delta);
@ -493,6 +498,7 @@ draw_session_end(struct ilo_render *render,
render->state.reduced_prim = session->reduced_prim;
render->state.primitive_restart = vec->draw->primitive_restart;
render->state.urb = vec->urb;
render->state.rs = vec->rasterizer->rs;
render->state.cc = vec->blend->cc;
}

View file

@ -90,6 +90,7 @@ struct ilo_render {
int reduced_prim;
int so_max_vertices;
struct ilo_state_urb urb;
struct ilo_state_raster rs;
struct ilo_state_cc cc;
@ -148,6 +149,7 @@ struct ilo_render_draw_session {
bool prim_changed;
bool primitive_restart_changed;
struct ilo_state_urb_delta urb_delta;
struct ilo_state_raster_delta rs_delta;
struct ilo_state_viewport_delta vp_delta;
struct ilo_state_cc_delta cc_delta;

View file

@ -329,64 +329,19 @@ gen6_draw_common_urb(struct ilo_render *r,
const struct ilo_state_vector *vec,
struct ilo_render_draw_session *session)
{
const bool gs_active = (vec->gs || (vec->vs &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
/* 3DSTATE_URB */
if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
const bool gs_active = (vec->gs || (vec->vs &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
int vs_entry_size, gs_entry_size;
int vs_total_size, gs_total_size;
vs_entry_size = (vec->vs) ?
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
/*
* As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
* share VUE handles. The VUE allocation size must be large enough to
* store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
*
* I am not sure if the PRM explicitly states that VF and VS share VUE
* handles. But here is a citation that implies so:
*
* From the Sandy Bridge PRM, volume 2 part 1, page 44:
*
* "Once a FF stage that spawn threads has sufficient input to
* initiate a thread, it must guarantee that it is safe to request
* the thread initiation. For all these FF stages, this check is
* based on :
*
* - The availability of output URB entries:
* - VS: As the input URB entries are overwritten with the
* VS-generated output data, output URB availability isn't a
* factor."
*/
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
gs_entry_size = (vec->gs) ?
ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
(gs_active) ? vs_entry_size : 0;
/* in bytes */
vs_entry_size *= sizeof(float) * 4;
gs_entry_size *= sizeof(float) * 4;
vs_total_size = r->dev->urb_size;
if (gs_active) {
vs_total_size /= 2;
gs_total_size = vs_total_size;
}
else {
gs_total_size = 0;
}
gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
vs_entry_size, gs_entry_size);
if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
ILO_STATE_URB_3DSTATE_URB_GS)) {
gen6_3DSTATE_URB(r->builder, &vec->urb);
if (r->state.gs.active && !gs_active)
gen6_wa_post_3dstate_urb_no_gs(r);
r->state.gs.active = gs_active;
}
r->state.gs.active = gs_active;
}
static void
@ -920,9 +875,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
0);
gen6_3DSTATE_URB(r->builder, &blitter->urb);
if (r->state.gs.active) {
gen6_wa_post_3dstate_urb_no_gs(r);

View file

@ -200,40 +200,17 @@ gen7_draw_common_urb(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_URB_{VS,GS,HS,DS} */
if (DIRTY(VE) || DIRTY(VS)) {
/* the first 16KB are reserved for VS and PS PCBs */
const int offset =
(ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
32768 : 16384;
int vs_entry_size, vs_total_size;
vs_entry_size = (vec->vs) ?
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 35:
*
* "Programming Restriction: As the VS URB entry serves as both the
* per-vertex input and output of the VS shader, the VS URB
* Allocation Size must be sized to the maximum of the vertex input
* and output structures."
*/
if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
vs_entry_size *= sizeof(float) * 4;
vs_total_size = r->dev->urb_size - offset;
if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
ILO_STATE_URB_3DSTATE_URB_HS |
ILO_STATE_URB_3DSTATE_URB_DS |
ILO_STATE_URB_3DSTATE_URB_GS)) {
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_pre_vs(r);
gen7_3DSTATE_URB_VS(r->builder,
offset, vs_total_size, vs_entry_size);
gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_VS(r->builder, &vec->urb);
gen7_3DSTATE_URB_GS(r->builder, &vec->urb);
gen7_3DSTATE_URB_HS(r->builder, &vec->urb);
gen7_3DSTATE_URB_DS(r->builder, &vec->urb);
}
}
@ -243,22 +220,15 @@ gen7_draw_common_pcb_alloc(struct ilo_render *r,
struct ilo_render_draw_session *session)
{
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
if (r->hw_ctx_changed) {
/*
* Push constant buffers are only allowed to take up at most the first
* 16KB of the URB. Split the space evenly for VS and FS.
*/
const int max_size =
(ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
32768 : 16384;
const int size = max_size / 2;
int offset = 0;
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
offset += size;
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
if (session->urb_delta.dirty &
(ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) {
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb);
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb);
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@ -671,21 +641,8 @@ static void
gen7_rectlist_pcb_alloc(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
/*
* Push constant buffers are only allowed to take up at most the first
* 16KB of the URB. Split the space evenly for VS and FS.
*/
const int max_size =
(ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
32768 : 16384;
const int size = max_size / 2;
int offset = 0;
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
offset += size;
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb);
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb);
if (ilo_dev_gen(r->dev) == ILO_GEN(7))
gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@ -695,19 +652,10 @@ static void
gen7_rectlist_urb(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
/* the first 16KB are reserved for VS and PS PCBs */
const int offset =
(ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
(ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
32768 : 16384;
gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
(blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
4 * sizeof(float));
gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
gen7_3DSTATE_URB_VS(r->builder, &blitter->urb);
gen7_3DSTATE_URB_GS(r->builder, &blitter->urb);
gen7_3DSTATE_URB_HS(r->builder, &blitter->urb);
gen7_3DSTATE_URB_DS(r->builder, &blitter->urb);
}
static void

View file

@ -477,6 +477,55 @@ finalize_vertex_elements(struct ilo_context *ilo)
}
}
static void
finalize_urb(struct ilo_context *ilo)
{
const uint16_t attr_size = sizeof(uint32_t) * 4;
const struct ilo_dev *dev = ilo->dev;
struct ilo_state_vector *vec = &ilo->state_vector;
struct ilo_state_urb_info info;
if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS |
ILO_DIRTY_GS | ILO_DIRTY_FS)))
return;
memset(&info, 0, sizeof(info));
info.ve_entry_size = attr_size *
(vec->ve->count + vec->ve->prepend_nosrc_cso);
if (vec->vs) {
info.vs_const_data = (bool)
(ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) +
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE));
info.vs_entry_size = attr_size *
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT);
}
if (vec->gs) {
info.gs_const_data = (bool)
ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 189:
*
* "All outputs of a GS thread will be stored in the single GS
* thread output URB entry."
*
* TODO
*/
info.gs_entry_size = attr_size *
ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT);
}
if (vec->fs) {
info.ps_const_data = (bool)
ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE);
}
ilo_state_urb_set_info(&vec->urb, dev, &info);
}
static void
finalize_viewport(struct ilo_context *ilo)
{
@ -680,6 +729,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo,
finalize_index_buffer(ilo);
finalize_vertex_elements(ilo);
finalize_urb(ilo);
finalize_rasterizer(ilo);
finalize_viewport(ilo);
finalize_blend(ilo);
@ -2065,6 +2115,8 @@ void
ilo_state_vector_init(const struct ilo_dev *dev,
struct ilo_state_vector *vec)
{
struct ilo_state_urb_info urb_info;
vec->sample_mask = ~0u;
ilo_state_viewport_init_data_only(&vec->viewport.vp, dev,
@ -2079,6 +2131,9 @@ ilo_state_vector_init(const struct ilo_dev *dev,
ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev);
memset(&urb_info, 0, sizeof(urb_info));
ilo_state_urb_init(&vec->urb, dev, &urb_info);
util_dynarray_init(&vec->global_binding.bindings);
vec->dirty = ILO_DIRTY_ALL;

View file

@ -34,6 +34,7 @@
#include "core/ilo_state_sampler.h"
#include "core/ilo_state_sol.h"
#include "core/ilo_state_surface.h"
#include "core/ilo_state_urb.h"
#include "core/ilo_state_viewport.h"
#include "core/ilo_state_zs.h"
#include "pipe/p_state.h"
@ -275,6 +276,8 @@ struct ilo_state_vector {
struct ilo_fb_state fb;
struct ilo_state_urb urb;
/* shader resources */
struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
struct ilo_view_state view[PIPE_SHADER_TYPES];