mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
ilo: move device limits to ilo_dev_info or to GPEs
It seems a bit weird to have device limits in a context.
This commit is contained in:
parent
bef98f9c3a
commit
ce188bb252
10 changed files with 127 additions and 96 deletions
|
|
@ -299,7 +299,7 @@ gen6_pipeline_common_urb(struct ilo_3d_pipeline *p,
|
|||
/* in bytes */
|
||||
vs_entry_size *= sizeof(float) * 4;
|
||||
gs_entry_size *= sizeof(float) * 4;
|
||||
vs_total_size = ilo->urb.size * 1024;
|
||||
vs_total_size = ilo->dev->urb_size;
|
||||
|
||||
if (gs_active) {
|
||||
vs_total_size /= 2;
|
||||
|
|
@ -480,8 +480,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
|
|||
const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL;
|
||||
const int num_samplers = ilo->samplers[PIPE_SHADER_VERTEX].num_samplers;
|
||||
|
||||
p->gen6_3DSTATE_VS(p->dev,
|
||||
vs, ilo->max_vs_threads, num_samplers, p->cp);
|
||||
p->gen6_3DSTATE_VS(p->dev, vs, num_samplers, p->cp);
|
||||
}
|
||||
|
||||
if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6))
|
||||
|
|
@ -506,8 +505,7 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
|
|||
if (gs)
|
||||
assert(!gs->pcb.clip_state_size);
|
||||
|
||||
p->gen6_3DSTATE_GS(p->dev,
|
||||
gs, ilo->max_gs_threads, vs,
|
||||
p->gen6_3DSTATE_GS(p->dev, gs, vs,
|
||||
(vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0,
|
||||
p->cp);
|
||||
}
|
||||
|
|
@ -666,8 +664,7 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
|
|||
if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed)
|
||||
gen6_wa_pipe_control_wm_max_threads_stall(p);
|
||||
|
||||
p->gen6_3DSTATE_WM(p->dev,
|
||||
fs, ilo->max_wm_threads, num_samplers,
|
||||
p->gen6_3DSTATE_WM(p->dev, fs, num_samplers,
|
||||
ilo->rasterizer, dual_blend, cc_may_kill, p->cp);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
|
|||
vs_entry_size = ilo->vertex_elements->num_elements;
|
||||
|
||||
vs_entry_size *= sizeof(float) * 4;
|
||||
vs_total_size = ilo->urb.size * 1024 - offset;
|
||||
vs_total_size = ilo->dev->urb_size - offset;
|
||||
|
||||
gen7_wa_pipe_control_vs_depth_stall(p);
|
||||
|
||||
|
|
@ -361,7 +361,7 @@ gen7_pipeline_gs(struct ilo_3d_pipeline *p,
|
|||
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
|
||||
if (session->hw_ctx_changed) {
|
||||
p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp);
|
||||
p->gen7_3DSTATE_GS(p->dev, NULL, 0, 0, p->cp);
|
||||
p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp);
|
||||
}
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
|
||||
|
|
@ -466,9 +466,7 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
|
|||
if (fs)
|
||||
assert(!fs->pcb.clip_state_size);
|
||||
|
||||
p->gen7_3DSTATE_PS(p->dev,
|
||||
fs, ilo->max_wm_threads, num_samplers,
|
||||
dual_blend, p->cp);
|
||||
p->gen7_3DSTATE_PS(p->dev, fs, num_samplers, dual_blend, p->cp);
|
||||
}
|
||||
|
||||
/* 3DSTATE_SCISSOR_STATE_POINTERS */
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ struct ilo_dev_info {
|
|||
|
||||
int gen;
|
||||
int gt;
|
||||
int urb_size;
|
||||
};
|
||||
|
||||
extern int ilo_debug;
|
||||
|
|
|
|||
|
|
@ -137,44 +137,6 @@ ilo_context_create(struct pipe_screen *screen, void *priv)
|
|||
ilo->winsys = is->winsys;
|
||||
ilo->dev = &is->dev;
|
||||
|
||||
/* stolen from classic i965 */
|
||||
/* WM maximum threads is number of EUs times number of threads per EU. */
|
||||
if (ilo->dev->gen >= ILO_GEN(7)) {
|
||||
if (ilo->dev->gt == 1) {
|
||||
ilo->max_wm_threads = 48;
|
||||
ilo->max_vs_threads = 36;
|
||||
ilo->max_gs_threads = 36;
|
||||
ilo->urb.size = 128;
|
||||
ilo->urb.max_vs_entries = 512;
|
||||
ilo->urb.max_gs_entries = 192;
|
||||
} else if (ilo->dev->gt == 2) {
|
||||
ilo->max_wm_threads = 172;
|
||||
ilo->max_vs_threads = 128;
|
||||
ilo->max_gs_threads = 128;
|
||||
ilo->urb.size = 256;
|
||||
ilo->urb.max_vs_entries = 704;
|
||||
ilo->urb.max_gs_entries = 320;
|
||||
} else {
|
||||
assert(!"Unknown gen7 device.");
|
||||
}
|
||||
} else if (ilo->dev->gen == ILO_GEN(6)) {
|
||||
if (ilo->dev->gt == 2) {
|
||||
ilo->max_wm_threads = 80;
|
||||
ilo->max_vs_threads = 60;
|
||||
ilo->max_gs_threads = 60;
|
||||
ilo->urb.size = 64; /* volume 5c.5 section 5.1 */
|
||||
ilo->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
|
||||
ilo->urb.max_gs_entries = 256;
|
||||
} else {
|
||||
ilo->max_wm_threads = 40;
|
||||
ilo->max_vs_threads = 24;
|
||||
ilo->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
|
||||
ilo->urb.size = 32; /* volume 5c.5 section 5.1 */
|
||||
ilo->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
|
||||
ilo->urb.max_gs_entries = 256;
|
||||
}
|
||||
}
|
||||
|
||||
ilo->cp = ilo_cp_create(ilo->winsys, is->dev.has_llc);
|
||||
ilo->shader_cache = ilo_shader_cache_create(ilo->winsys);
|
||||
if (ilo->cp)
|
||||
|
|
|
|||
|
|
@ -73,15 +73,6 @@ struct ilo_context {
|
|||
struct intel_winsys *winsys;
|
||||
struct ilo_dev_info *dev;
|
||||
|
||||
int max_vs_threads;
|
||||
int max_gs_threads;
|
||||
int max_wm_threads;
|
||||
struct {
|
||||
int size;
|
||||
int max_vs_entries;
|
||||
int max_gs_entries;
|
||||
} urb;
|
||||
|
||||
struct ilo_cp *cp;
|
||||
struct intel_bo *last_cp_bo;
|
||||
|
||||
|
|
|
|||
|
|
@ -1021,13 +1021,13 @@ gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
|
|||
static void
|
||||
gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *vs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
|
||||
const uint8_t cmd_len = 6;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
int vue_read_len;
|
||||
int vue_read_len, max_threads;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 6, 7);
|
||||
|
||||
|
|
@ -1057,6 +1057,36 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
|
|||
if (!vue_read_len)
|
||||
vue_read_len = 1;
|
||||
|
||||
switch (dev->gen) {
|
||||
case ILO_GEN(6):
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 1 part 1, page 22:
|
||||
*
|
||||
* "Device # of EUs #Threads/EU
|
||||
* SNB GT2 12 5
|
||||
* SNB GT1 6 4"
|
||||
*/
|
||||
max_threads = (dev->gt == 2) ? 60 : 24;
|
||||
break;
|
||||
case ILO_GEN(7):
|
||||
/*
|
||||
* From the Ivy Bridge PRM, volume 1 part 1, page 18:
|
||||
*
|
||||
* "Device # of EUs #Threads/EU
|
||||
* Ivy Bridge (GT2) 16 8
|
||||
* Ivy Bridge (GT1) 6 6"
|
||||
*/
|
||||
max_threads = (dev->gt == 2) ? 128 : 36;
|
||||
break;
|
||||
case ILO_GEN(7.5):
|
||||
/* see brwCreateContext() */
|
||||
max_threads = (dev->gt == 2) ? 280 : 70;
|
||||
break;
|
||||
default:
|
||||
max_threads = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
|
||||
if (false)
|
||||
dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
|
||||
|
|
@ -1086,7 +1116,7 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
|
|||
static void
|
||||
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
int max_threads, const struct ilo_shader *vs,
|
||||
const struct ilo_shader *vs,
|
||||
uint32_t vs_offset,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
|
|
@ -1105,7 +1135,7 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
|||
dw6 = 0;
|
||||
}
|
||||
else {
|
||||
int vue_read_len;
|
||||
int max_threads, vue_read_len;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
|
||||
|
|
@ -1124,6 +1154,15 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
|||
*
|
||||
* As such, we always enable rendering, and limit the number of threads.
|
||||
*/
|
||||
if (dev->gt == 2) {
|
||||
/* maximum is 60, but limited to 28 */
|
||||
max_threads = 28;
|
||||
}
|
||||
else {
|
||||
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
|
||||
max_threads = 21;
|
||||
}
|
||||
|
||||
if (max_threads > 28)
|
||||
max_threads = 28;
|
||||
|
||||
|
|
@ -1798,7 +1837,7 @@ gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
|
|||
static void
|
||||
gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *fs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
const struct pipe_rasterizer_state *rasterizer,
|
||||
bool dual_blend, bool cc_may_kill,
|
||||
struct ilo_cp *cp)
|
||||
|
|
@ -1807,9 +1846,13 @@ gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
|
|||
const uint8_t cmd_len = 9;
|
||||
const int num_samples = 1;
|
||||
uint32_t dw2, dw4, dw5, dw6;
|
||||
int max_threads;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 6, 6);
|
||||
|
||||
/* see brwCreateContext() */
|
||||
max_threads = (dev->gt == 2) ? 80 : 40;
|
||||
|
||||
if (!fs) {
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
ilo_cp_write(cp, cmd | (cmd_len - 2));
|
||||
|
|
|
|||
|
|
@ -236,13 +236,13 @@ typedef void
|
|||
typedef void
|
||||
(*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *vs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
typedef void
|
||||
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
int max_threads, const struct ilo_shader *vs,
|
||||
const struct ilo_shader *vs,
|
||||
uint32_t vs_offset,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
|
|
@ -264,7 +264,7 @@ typedef void
|
|||
typedef void
|
||||
(*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *fs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
const struct pipe_rasterizer_state *rasterizer,
|
||||
bool dual_blend, bool cc_may_kill,
|
||||
struct ilo_cp *cp);
|
||||
|
|
|
|||
|
|
@ -95,15 +95,25 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
|
|||
static void
|
||||
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
|
||||
const uint8_t cmd_len = 7;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
int max_threads;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 7, 7);
|
||||
|
||||
switch (dev->gen) {
|
||||
case ILO_GEN(7):
|
||||
max_threads = (dev->gt == 2) ? 128 : 36;
|
||||
break;
|
||||
default:
|
||||
max_threads = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!gs) {
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
ilo_cp_write(cp, cmd | (cmd_len - 2));
|
||||
|
|
@ -597,27 +607,18 @@ gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
|
|||
static void
|
||||
gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *fs,
|
||||
int max_threads, int num_samplers,
|
||||
bool dual_blend,
|
||||
int num_samplers, bool dual_blend,
|
||||
struct ilo_cp *cp)
|
||||
{
|
||||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
|
||||
const uint8_t cmd_len = 8;
|
||||
uint32_t dw2, dw4, dw5;
|
||||
int max_threads;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 7, 7);
|
||||
|
||||
/*
|
||||
* From the Ivy Bridge PRM, volume 2 part 1, page 286:
|
||||
*
|
||||
* "This field (Maximum Number of Threads) must have an odd value so
|
||||
* that the max number of PS threads is even."
|
||||
*/
|
||||
max_threads &= ~1;
|
||||
|
||||
/* the valid range is [4, 48] */
|
||||
if (max_threads < 4)
|
||||
max_threads = 4;
|
||||
/* see brwCreateContext() */
|
||||
max_threads = (dev->gt == 2) ? 172 : 48;
|
||||
|
||||
if (!fs) {
|
||||
ilo_cp_begin(cp, cmd_len);
|
||||
|
|
@ -793,7 +794,7 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
|
|||
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
|
||||
const uint8_t cmd_len = 2;
|
||||
const int row_size = 64; /* 512 bits */
|
||||
int alloc_size, num_entries;
|
||||
int alloc_size, num_entries, min_entries, max_entries;
|
||||
|
||||
ILO_GPE_VALID_GEN(dev, 7, 7);
|
||||
|
||||
|
|
@ -824,16 +825,27 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
|
|||
|
||||
switch (subop) {
|
||||
case 0x30: /* 3DSTATE_URB_VS */
|
||||
assert(num_entries >= 32);
|
||||
if (dev->gt == 2 && num_entries > 704)
|
||||
num_entries = 704;
|
||||
else if (dev->gt == 1 && num_entries > 512)
|
||||
num_entries = 512;
|
||||
min_entries = 32;
|
||||
max_entries = (dev->gt == 2) ? 704 : 512;
|
||||
|
||||
assert(num_entries >= min_entries);
|
||||
if (num_entries > max_entries)
|
||||
num_entries = max_entries;
|
||||
break;
|
||||
case 0x31: /* 3DSTATE_URB_HS */
|
||||
max_entries = (dev->gt == 2) ? 64 : 32;
|
||||
if (num_entries > max_entries)
|
||||
num_entries = max_entries;
|
||||
break;
|
||||
case 0x32: /* 3DSTATE_URB_DS */
|
||||
if (num_entries)
|
||||
assert(num_entries >= 138);
|
||||
break;
|
||||
case 0x33: /* 3DSTATE_URB_GS */
|
||||
max_entries = (dev->gt == 2) ? 320 : 192;
|
||||
if (num_entries > max_entries)
|
||||
num_entries = max_entries;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
|
|||
typedef void
|
||||
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *gs,
|
||||
int max_threads, int num_samplers,
|
||||
int num_samplers,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP;
|
||||
|
|
@ -239,8 +239,7 @@ typedef void
|
|||
typedef void
|
||||
(*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev,
|
||||
const struct ilo_shader *fs,
|
||||
int max_threads, int num_samplers,
|
||||
bool dual_blend,
|
||||
int num_samplers, bool dual_blend,
|
||||
struct ilo_cp *cp);
|
||||
|
||||
typedef void
|
||||
|
|
|
|||
|
|
@ -626,29 +626,57 @@ init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
|
|||
dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
|
||||
dev->has_llc = info->has_llc;
|
||||
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 4 part 2, page 18:
|
||||
*
|
||||
* "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
|
||||
* as 1024 256-bit rows. The GT2 product's URB provides 64KB of
|
||||
* storage, arranged as 2048 256-bit rows. A row corresponds in size
|
||||
* to an EU GRF register. Read/write access to the URB is generally
|
||||
* supported on a row-granular basis."
|
||||
*
|
||||
* From the Ivy Bridge PRM, volume 4 part 2, page 17:
|
||||
*
|
||||
* "URB Size URB Rows URB Rows when SLM Enabled
|
||||
* 128k 4096 2048
|
||||
* 256k 8096 4096"
|
||||
*/
|
||||
|
||||
if (IS_HASWELL(info->devid)) {
|
||||
dev->gen = ILO_GEN(7.5);
|
||||
|
||||
if (IS_HSW_GT2(info->devid))
|
||||
if (IS_HSW_GT2(info->devid)) {
|
||||
dev->gt = 2;
|
||||
else
|
||||
dev->urb_size = 256 * 1024;
|
||||
}
|
||||
else {
|
||||
dev->gt = 1;
|
||||
dev->urb_size = 128 * 1024;
|
||||
}
|
||||
}
|
||||
else if (IS_GEN7(info->devid)) {
|
||||
dev->gen = ILO_GEN(7);
|
||||
|
||||
if (IS_IVB_GT2(info->devid))
|
||||
if (IS_IVB_GT2(info->devid)) {
|
||||
dev->gt = 2;
|
||||
else
|
||||
dev->urb_size = 256 * 1024;
|
||||
}
|
||||
else {
|
||||
dev->gt = 1;
|
||||
dev->urb_size = 128 * 1024;
|
||||
}
|
||||
}
|
||||
else if (IS_GEN6(info->devid)) {
|
||||
dev->gen = ILO_GEN(6);
|
||||
|
||||
if (IS_SNB_GT2(info->devid))
|
||||
if (IS_SNB_GT2(info->devid)) {
|
||||
dev->gt = 2;
|
||||
else
|
||||
dev->urb_size = 64 * 1024;
|
||||
}
|
||||
else {
|
||||
dev->gt = 1;
|
||||
dev->urb_size = 32 * 1024;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ilo_err("unknown GPU generation\n");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue