mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
anv: expose RT state flushing
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
parent
0ef0b358da
commit
22ab95ae10
2 changed files with 105 additions and 82 deletions
|
|
@ -234,6 +234,9 @@ void genX(cmd_buffer_flush_gfx)(struct anv_cmd_buffer *cmd_buffer);
|
|||
|
||||
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_flush_rt_state)(struct anv_cmd_buffer *cmd_buffer,
|
||||
unsigned scratch_size);
|
||||
|
||||
void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool enable);
|
||||
|
||||
|
|
|
|||
|
|
@ -1207,6 +1207,7 @@ cmd_buffer_emit_rt_dispatch_globals_indirect(struct anv_cmd_buffer *cmd_buffer,
|
|||
return rtdg_state;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 30
|
||||
static uint8_t
|
||||
get_stack_id_reduction_cap(uint32_t stack_ids)
|
||||
{
|
||||
|
|
@ -1216,7 +1217,6 @@ get_stack_id_reduction_cap(uint32_t stack_ids)
|
|||
* This value must always be smaller than value given by
|
||||
* CFE_STATE.Stack_ID_Control.
|
||||
*/
|
||||
#if GFX_VER >= 30
|
||||
switch (stack_ids) {
|
||||
case 2048: return REDUCTION_CAP_1024;
|
||||
case 1024: return REDUCTION_CAP_512;
|
||||
|
|
@ -1224,35 +1224,21 @@ get_stack_id_reduction_cap(uint32_t stack_ids)
|
|||
case 256: return REDUCTION_CAP_128;
|
||||
default: UNREACHABLE("Invalid stack_ids value");
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct trace_params *params)
|
||||
cmd_buffer_flush_rt_state(struct anv_cmd_buffer *cmd_buffer,
|
||||
unsigned scratch_size)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_RT_NO_TRACE))
|
||||
return;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
/* If we have a known degenerate launch size, just bail */
|
||||
if (!params->is_launch_size_indirect &&
|
||||
(params->launch_size[0] == 0 ||
|
||||
params->launch_size[1] == 0 ||
|
||||
params->launch_size[2] == 0))
|
||||
return;
|
||||
|
||||
trace_intel_begin_rays(&cmd_buffer->trace);
|
||||
|
||||
cmd_buffer->state.compute.trace_rays_active = true;
|
||||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, device->l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE);
|
||||
|
|
@ -1269,6 +1255,70 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
genX(cmd_buffer_flush_push_descriptors)(cmd_buffer,
|
||||
&cmd_buffer->state.rt.base);
|
||||
|
||||
#if GFX_VERx10 == 125
|
||||
/* Wa_14014427904 - We need additional invalidate/flush when
|
||||
* emitting NP state commands with ATS-M in compute mode.
|
||||
*/
|
||||
if (intel_device_info_is_atsm(device->info) &&
|
||||
cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
|
||||
genx_batch_emit_pipe_control(&cmd_buffer->batch,
|
||||
cmd_buffer->device->info,
|
||||
cmd_buffer->state.current_pipeline,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) {
|
||||
uint32_t dispatch_timeout_counter =
|
||||
cmd_buffer->device->physical->instance->dispatch_timeout_counter;
|
||||
uint32_t clamped_timeout_counter =
|
||||
genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter);
|
||||
#if GFX_VERx10 >= 200
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter;
|
||||
#else
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3;
|
||||
btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3;
|
||||
#endif
|
||||
|
||||
/* BSpec 43851: "This field must be programmed to 6h i.e. memory backed
|
||||
* buffer must be 128KB."
|
||||
*/
|
||||
btd.PerDSSMemoryBackedBufferSize = 6;
|
||||
btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo };
|
||||
if (scratch_size > 0) {
|
||||
btd.ScratchSpaceBuffer = anv_shader_get_scratch_surf(&cmd_buffer->batch,
|
||||
cmd_buffer->device,
|
||||
MESA_SHADER_COMPUTE,
|
||||
scratch_size,
|
||||
false);
|
||||
}
|
||||
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
|
||||
btd.BTDMidthreadpreemption = false;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 20
|
||||
/* TODO: We can tune this value specific to apps. */
|
||||
btd.ControlsthemaximumnumberofoutstandingRayQueriesperSS =
|
||||
RAYS_QUERIES_OUTSTANDING_1024;
|
||||
#endif
|
||||
#if GFX_VER >= 30
|
||||
btd.RTMemStructures64bModeEnable = true;
|
||||
btd.DynamicstackmanagementmechanismMISSPENALTY = MISS_PENALTY_16;
|
||||
btd.DynamicstackmanagementmechanismHITREWARD = HIT_REWARD_1;
|
||||
btd.DynamicstackmanagementmechanismSCALINGFACTOR = SCALING_FACTOR_4;
|
||||
btd.DynamicstackmanagementmechanismREDUCTIONCAP =
|
||||
get_stack_id_reduction_cap(cmd_buffer->device->physical->instance->stack_ids);
|
||||
#endif
|
||||
}
|
||||
|
||||
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, scratch_size);
|
||||
|
||||
/* Add these to the reloc list as they're internal buffers that don't
|
||||
* actually have relocs to pick them up manually.
|
||||
*
|
||||
|
|
@ -1278,6 +1328,37 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
rt->scratch.bo);
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->device->btd_fifo_bo);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_rt_state)(struct anv_cmd_buffer *cmd_buffer,
|
||||
unsigned scratch_size)
|
||||
{
|
||||
cmd_buffer_flush_rt_state(cmd_buffer, scratch_size);
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct trace_params *params)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_RT_NO_TRACE))
|
||||
return;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
/* If we have a known degenerate launch size, just bail */
|
||||
if (!params->is_launch_size_indirect &&
|
||||
(params->launch_size[0] == 0 ||
|
||||
params->launch_size[1] == 0 ||
|
||||
params->launch_size[2] == 0))
|
||||
return;
|
||||
|
||||
trace_intel_begin_rays(&cmd_buffer->trace);
|
||||
|
||||
cmd_buffer->state.compute.trace_rays_active = true;
|
||||
|
||||
/* Allocate and set up our RT_DISPATCH_GLOBALS */
|
||||
struct anv_state rtdg_state =
|
||||
|
|
@ -1364,70 +1445,6 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 125
|
||||
/* Wa_14014427904 - We need additional invalidate/flush when
|
||||
* emitting NP state commands with ATS-M in compute mode.
|
||||
*/
|
||||
if (intel_device_info_is_atsm(device->info) &&
|
||||
cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
|
||||
genx_batch_emit_pipe_control(&cmd_buffer->batch,
|
||||
cmd_buffer->device->info,
|
||||
cmd_buffer->state.current_pipeline,
|
||||
ANV_PIPE_CS_STALL_BIT |
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
|
||||
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) {
|
||||
uint32_t dispatch_timeout_counter =
|
||||
cmd_buffer->device->physical->instance->dispatch_timeout_counter;
|
||||
uint32_t clamped_timeout_counter =
|
||||
genX(anv_get_btd_dispatch_timeout_counter)(dispatch_timeout_counter);
|
||||
#if GFX_VERx10 >= 200
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter;
|
||||
#else
|
||||
btd.DispatchTimeoutCounter = clamped_timeout_counter & 0x3;
|
||||
btd.DispatchTimeoutCounterExtend = (clamped_timeout_counter >> 2) & 0x3;
|
||||
#endif
|
||||
|
||||
/* BSpec 43851: "This field must be programmed to 6h i.e. memory backed
|
||||
* buffer must be 128KB."
|
||||
*/
|
||||
btd.PerDSSMemoryBackedBufferSize = 6;
|
||||
btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo };
|
||||
if (rt->scratch_size > 0) {
|
||||
btd.ScratchSpaceBuffer = anv_shader_get_scratch_surf(&cmd_buffer->batch,
|
||||
cmd_buffer->device,
|
||||
MESA_SHADER_COMPUTE,
|
||||
rt->scratch_size,
|
||||
false);;
|
||||
}
|
||||
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
|
||||
btd.BTDMidthreadpreemption = false;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 20
|
||||
/* TODO: We can tune this value specific to apps. */
|
||||
btd.ControlsthemaximumnumberofoutstandingRayQueriesperSS =
|
||||
RAYS_QUERIES_OUTSTANDING_1024;
|
||||
#endif
|
||||
#if GFX_VER >= 30
|
||||
btd.RTMemStructures64bModeEnable = true;
|
||||
btd.DynamicstackmanagementmechanismMISSPENALTY = MISS_PENALTY_16;
|
||||
btd.DynamicstackmanagementmechanismHITREWARD = HIT_REWARD_1;
|
||||
btd.DynamicstackmanagementmechanismSCALINGFACTOR = SCALING_FACTOR_4;
|
||||
btd.DynamicstackmanagementmechanismREDUCTIONCAP =
|
||||
get_stack_id_reduction_cap(cmd_buffer->device->physical->instance->stack_ids);
|
||||
#endif
|
||||
}
|
||||
|
||||
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, rt->scratch_size);
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(device->rt_trampoline->prog_data);
|
||||
struct intel_cs_dispatch_info dispatch =
|
||||
|
|
@ -1538,6 +1555,7 @@ genX(CmdTraceRaysKHR)(
|
|||
},
|
||||
};
|
||||
|
||||
cmd_buffer_flush_rt_state(cmd_buffer, cmd_buffer->state.rt.scratch_size);
|
||||
cmd_buffer_trace_rays(cmd_buffer, ¶ms);
|
||||
}
|
||||
|
||||
|
|
@ -1561,6 +1579,7 @@ genX(CmdTraceRaysIndirectKHR)(
|
|||
.launch_size_addr = indirectDeviceAddress,
|
||||
};
|
||||
|
||||
cmd_buffer_flush_rt_state(cmd_buffer, cmd_buffer->state.rt.scratch_size);
|
||||
cmd_buffer_trace_rays(cmd_buffer, ¶ms);
|
||||
}
|
||||
|
||||
|
|
@ -1578,6 +1597,7 @@ genX(CmdTraceRaysIndirect2KHR)(
|
|||
offsetof(VkTraceRaysIndirectCommand2KHR, width),
|
||||
};
|
||||
|
||||
cmd_buffer_flush_rt_state(cmd_buffer, cmd_buffer->state.rt.scratch_size);
|
||||
cmd_buffer_trace_rays(cmd_buffer, ¶ms);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue