iris: better dirty checking

This commit is contained in:
Kenneth Graunke 2018-10-19 01:29:05 -07:00
parent 06a993dac2
commit f8796079da
2 changed files with 69 additions and 54 deletions

View file

@ -115,6 +115,10 @@ dirty_flag_for_cache(enum iris_program_cache_id cache_id)
if (cache_id == IRIS_CACHE_VS)
return IRIS_DIRTY_VS | IRIS_DIRTY_VF_SGVS;
/* For compute, prog_data->threads needs to be uploaded as constants. */
if (cache_id == IRIS_CACHE_CS)
return IRIS_DIRTY_CS | IRIS_DIRTY_CONSTANTS_CS;
return IRIS_DIRTY_VS << cache_id | IRIS_DIRTY_BINDINGS_VS << cache_id;
}

View file

@ -4284,43 +4284,46 @@ iris_upload_compute_state(struct iris_context *ice,
if (ice->state.need_border_colors)
iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false);
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
*
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
* the only bits that are changed are scoreboard related: Scoreboard
* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
* these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
*/
iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
if (dirty & IRIS_DIRTY_CS) {
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
*
* "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
* the only bits that are changed are scoreboard related: Scoreboard
* Enable, Scoreboard Type, Scoreboard Mask, Scoreboard Delta. For
* these scoreboard related states, a MEDIA_STATE_FLUSH is
* sufficient."
*/
iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
/* Per Thread Scratch Space is in the range [0, 11] where
* 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
// XXX: vfe.ScratchSpaceBasePointer
//vfe.PerThreadScratchSpace =
//ffs(stage_state->per_thread_scratch) - 11;
}
iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
/* Per Thread Scratch Space is in the range [0, 11] where
* 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
// XXX: vfe.ScratchSpaceBasePointer
//vfe.PerThreadScratchSpace =
//ffs(stage_state->per_thread_scratch) - 11;
}
vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1;
vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1;
#if GEN_GEN < 11
vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp;
vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp;
#endif
vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2;
vfe.NumberofURBEntries = 2;
vfe.URBEntryAllocationSize = 2;
// XXX: Use Indirect Payload Storage?
vfe.CURBEAllocationSize =
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
cs_prog_data->push.cross_thread.regs, 2);
// XXX: Use Indirect Payload Storage?
vfe.CURBEAllocationSize =
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
cs_prog_data->push.cross_thread.regs, 2);
}
}
// XXX: hack iris_set_constant_buffers to upload compute shader constants
// XXX: differently...?
// XXX: hack iris_set_constant_buffers to upload these thread counts
// XXX: along with regular uniforms for compute shaders, somehow.
uint32_t curbe_data_offset = 0;
// TODO: Move subgroup-id into uniforms ubo so we can push uniforms
@ -4335,36 +4338,44 @@ iris_upload_compute_state(struct iris_context *ice,
assert(curbe_data_map);
memset(curbe_data_map, 0x5a, ALIGN(cs_prog_data->push.total.size, 64));
iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map);
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength =
ALIGN(cs_prog_data->push.total.size, 64);
curbe.CURBEDataStartAddress = curbe_data_offset;
if (dirty & IRIS_DIRTY_CONSTANTS_CS) {
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength =
ALIGN(cs_prog_data->push.total.size, 64);
curbe.CURBEDataStartAddress = curbe_data_offset;
}
}
struct pipe_resource *desc_res = NULL;
uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
if (dirty & (IRIS_DIRTY_SAMPLER_STATES_CS |
IRIS_DIRTY_BINDINGS_CS |
IRIS_DIRTY_CONSTANTS_CS |
IRIS_DIRTY_CS)) {
struct pipe_resource *desc_res = NULL;
uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
idd.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs;
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
idd.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs;
}
for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
desc[i] |= ((uint32_t *) shader->derived_data)[i];
iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
load.InterfaceDescriptorTotalLength =
GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
load.InterfaceDescriptorDataStartAddress =
emit_state(batch, ice->state.dynamic_uploader,
&desc_res, desc, sizeof(desc), 32);
}
pipe_resource_reference(&desc_res, NULL);
}
for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++)
desc[i] |= ((uint32_t *) shader->derived_data)[i];
iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
load.InterfaceDescriptorTotalLength =
GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
load.InterfaceDescriptorDataStartAddress =
emit_state(batch, ice->state.dynamic_uploader,
&desc_res, desc, sizeof(desc), 32);
}
pipe_resource_reference(&desc_res, NULL);
uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
uint32_t right_mask;