mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 13:00:09 +01:00
ilo: improve media command helpers
They were written for Gen6 but mostly untested. Make them work for Gen7+. Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
This commit is contained in:
parent
a2054af85c
commit
9dd596c99f
1 changed files with 146 additions and 76 deletions
|
|
@ -32,134 +32,169 @@
|
|||
#include "intel_winsys.h"
|
||||
|
||||
#include "ilo_common.h"
|
||||
#include "ilo_shader.h"
|
||||
#include "ilo_builder.h"
|
||||
|
||||
struct gen6_idrt_data {
|
||||
const struct ilo_shader_state *cs;
|
||||
|
||||
uint32_t sampler_offset;
|
||||
uint32_t binding_table_offset;
|
||||
|
||||
unsigned curbe_size;
|
||||
unsigned thread_group_size;
|
||||
};
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
|
||||
int max_threads, int num_urb_entries,
|
||||
int urb_entry_size)
|
||||
unsigned curbe_alloc, bool use_slm)
|
||||
{
|
||||
const uint8_t cmd_len = 8;
|
||||
uint32_t dw2, dw4, *dw;
|
||||
const unsigned idrt_alloc =
|
||||
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
|
||||
int max_threads;
|
||||
uint32_t *dw;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
dw2 = (max_threads - 1) << 16 |
|
||||
num_urb_entries << 8 |
|
||||
1 << 7 | /* Reset Gateway Timer */
|
||||
1 << 6; /* Bypass Gateway Control */
|
||||
max_threads = builder->dev->thread_count;
|
||||
|
||||
dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
|
||||
480; /* CURBE Allocation Size */
|
||||
curbe_alloc = align(curbe_alloc, 32);
|
||||
assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
|
||||
dw[1] = 0; /* scratch */
|
||||
dw[2] = dw2;
|
||||
dw[3] = 0; /* MBZ */
|
||||
dw[4] = dw4;
|
||||
dw[5] = 0; /* scoreboard */
|
||||
|
||||
dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
|
||||
0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
|
||||
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
|
||||
GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
|
||||
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
|
||||
dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
|
||||
|
||||
dw[3] = 0;
|
||||
|
||||
dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
|
||||
(curbe_alloc / 32);
|
||||
|
||||
dw[5] = 0;
|
||||
dw[6] = 0;
|
||||
dw[7] = 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_CURBE_LOAD(struct ilo_builder *builder,
|
||||
uint32_t buf, int size)
|
||||
uint32_t offset, unsigned size)
|
||||
{
|
||||
const uint8_t cmd_len = 4;
|
||||
uint32_t *dw;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
assert(buf % 32 == 0);
|
||||
/* gen6_push_constant_buffer() allocates buffers in 256-bit units */
|
||||
size = align(size, 32);
|
||||
assert(offset % 32 == 0 && size % 32 == 0);
|
||||
/* GPU hangs if size is zero */
|
||||
assert(size);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) | (cmd_len - 2);
|
||||
dw[1] = 0; /* MBZ */
|
||||
dw[1] = 0;
|
||||
dw[2] = size;
|
||||
dw[3] = buf;
|
||||
dw[3] = offset;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(struct ilo_builder *builder,
|
||||
uint32_t offset, int num_ids)
|
||||
uint32_t offset, unsigned size)
|
||||
{
|
||||
const uint8_t cmd_len = 4;
|
||||
const unsigned idrt_alloc =
|
||||
((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
|
||||
uint32_t *dw;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
assert(offset % 32 == 0);
|
||||
assert(offset % 32 == 0 && size % 32 == 0);
|
||||
assert(size && size <= idrt_alloc);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) |
|
||||
(cmd_len - 2);
|
||||
dw[1] = 0; /* MBZ */
|
||||
/* every ID has 8 DWords */
|
||||
dw[2] = num_ids * 8 * 4;
|
||||
dw[1] = 0;
|
||||
dw[2] = size;
|
||||
dw[3] = offset;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_GATEWAY_STATE(struct ilo_builder *builder,
|
||||
int id, int byte, int thread_count)
|
||||
gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder)
|
||||
{
|
||||
const uint8_t cmd_len = 2;
|
||||
uint32_t *dw;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_GATEWAY_STATE) | (cmd_len - 2);
|
||||
dw[1] = id << 16 |
|
||||
byte << 8 |
|
||||
thread_count;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder,
|
||||
int thread_count_water_mark,
|
||||
int barrier_mask)
|
||||
{
|
||||
const uint8_t cmd_len = 2;
|
||||
uint32_t *dw;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) | (cmd_len - 2);
|
||||
dw[1] = thread_count_water_mark << 16 |
|
||||
barrier_mask;
|
||||
dw[1] = 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_MEDIA_OBJECT_WALKER(struct ilo_builder *builder)
|
||||
gen7_GPGPU_WALKER(struct ilo_builder *builder,
|
||||
const unsigned thread_group_offset[3],
|
||||
const unsigned thread_group_dim[3],
|
||||
unsigned thread_group_size,
|
||||
unsigned simd_size)
|
||||
{
|
||||
assert(!"MEDIA_OBJECT_WALKER unsupported");
|
||||
}
|
||||
const uint8_t cmd_len = 11;
|
||||
uint32_t right_execmask, bottom_execmask;
|
||||
unsigned thread_count;
|
||||
uint32_t *dw;
|
||||
|
||||
static inline void
|
||||
gen7_GPGPU_WALKER(struct ilo_builder *builder)
|
||||
{
|
||||
assert(!"GPGPU_WALKER unsupported");
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
assert(simd_size == 16 || simd_size == 8);
|
||||
|
||||
thread_count = (thread_group_size + simd_size - 1) / simd_size;
|
||||
assert(thread_count <= 64);
|
||||
|
||||
right_execmask = thread_group_size % simd_size;
|
||||
if (right_execmask)
|
||||
right_execmask = (1 << right_execmask) - 1;
|
||||
else
|
||||
right_execmask = (1 << simd_size) - 1;
|
||||
|
||||
bottom_execmask = 0xffffffff;
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(MEDIA, GPGPU_WALKER) | (cmd_len - 2);
|
||||
dw[1] = 0; /* always first IDRT */
|
||||
|
||||
dw[2] = (thread_count - 1) << GEN7_GPGPU_DW2_THREAD_MAX_X__SHIFT;
|
||||
if (simd_size == 16)
|
||||
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD16;
|
||||
else
|
||||
dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD8;
|
||||
|
||||
dw[3] = thread_group_offset[0];
|
||||
dw[4] = thread_group_dim[0];
|
||||
dw[5] = thread_group_offset[1];
|
||||
dw[6] = thread_group_dim[1];
|
||||
dw[7] = thread_group_offset[2];
|
||||
dw[8] = thread_group_dim[2];
|
||||
|
||||
dw[9] = right_execmask;
|
||||
dw[10] = bottom_execmask;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
|
||||
const struct ilo_shader_state **cs,
|
||||
uint32_t *sampler_state,
|
||||
int *num_samplers,
|
||||
uint32_t *binding_table_state,
|
||||
int *num_surfaces,
|
||||
int num_ids)
|
||||
const struct gen6_idrt_data *data,
|
||||
int idrt_count)
|
||||
{
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 2, page 34:
|
||||
|
|
@ -175,25 +210,60 @@ gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
|
|||
* aligned address of the Interface Descriptor data."
|
||||
*/
|
||||
const int state_align = 32;
|
||||
const int state_len = (32 / 4) * num_ids;
|
||||
const int state_len = (32 / 4) * idrt_count;
|
||||
uint32_t state_offset, *dw;
|
||||
int i;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
state_offset = ilo_builder_state_pointer(builder,
|
||||
state_offset = ilo_builder_dynamic_pointer(builder,
|
||||
ILO_BUILDER_ITEM_BLOB, state_align, state_len, &dw);
|
||||
|
||||
for (i = 0; i < num_ids; i++) {
|
||||
dw[0] = ilo_shader_get_kernel_offset(cs[i]);
|
||||
dw[1] = 1 << 18; /* SPF */
|
||||
dw[2] = sampler_state[i] |
|
||||
(num_samplers[i] + 3) / 4 << 2;
|
||||
dw[3] = binding_table_state[i] |
|
||||
num_surfaces[i];
|
||||
dw[4] = 0 << 16 | /* CURBE Read Length */
|
||||
0; /* CURBE Read Offset */
|
||||
dw[5] = 0; /* Barrier ID */
|
||||
for (i = 0; i < idrt_count; i++) {
|
||||
const struct gen6_idrt_data *idrt = &data[i];
|
||||
const struct ilo_shader_state *cs = idrt->cs;
|
||||
unsigned sampler_count, bt_size, slm_size;
|
||||
|
||||
sampler_count =
|
||||
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
|
||||
assert(sampler_count <= 16);
|
||||
sampler_count = (sampler_count + 3) / 4;
|
||||
|
||||
bt_size =
|
||||
ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
|
||||
if (bt_size > 31)
|
||||
bt_size = 31;
|
||||
|
||||
slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
|
||||
|
||||
assert(idrt->curbe_size / 32 <= 63);
|
||||
|
||||
dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
|
||||
dw[1] = 0;
|
||||
dw[2] = idrt->sampler_offset |
|
||||
sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
|
||||
dw[3] = idrt->binding_table_offset |
|
||||
bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
|
||||
|
||||
dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
|
||||
0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
|
||||
|
||||
if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
|
||||
dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
|
||||
|
||||
if (slm_size) {
|
||||
assert(slm_size <= 64 * 1024);
|
||||
slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
|
||||
|
||||
dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
|
||||
slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
|
||||
idrt->thread_group_size <<
|
||||
GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
|
||||
}
|
||||
} else {
|
||||
dw[5] = 0;
|
||||
}
|
||||
|
||||
dw[6] = 0;
|
||||
dw[7] = 0;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue