mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 02:50:16 +01:00
radv: remove DFSM
DFSM has never been enabled by default because it was slower. RadeonSI is also dropping support for this because they discovered that's actually not efficient in practice. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10968>
This commit is contained in:
parent
f0f0a21f13
commit
69ae02151d
6 changed files with 5 additions and 60 deletions
|
|
@ -633,8 +633,6 @@ RADV driver environment variables
|
|||
enable wave32 for compute shaders (GFX10+)
|
||||
``dccmsaa``
|
||||
enable DCC for MSAA images
|
||||
``dfsm``
|
||||
enable DFSM
|
||||
``gewave32``
|
||||
enable wave32 for vertex/tess/geometry shaders (GFX10+)
|
||||
``localbos``
|
||||
|
|
|
|||
|
|
@ -950,12 +950,6 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_emit(cs, centroid_priority);
|
||||
radeon_emit(cs, centroid_priority >> 32);
|
||||
|
||||
/* GFX9: Flush DFSM when the AA mode changes. */
|
||||
if (cmd_buffer->device->dfsm_allowed) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
cmd_buffer->state.context_roll_without_scissor_emitted = true;
|
||||
}
|
||||
|
||||
|
|
@ -1001,8 +995,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
|
|||
|
||||
if (old_pipeline &&
|
||||
old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
|
||||
pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
|
||||
old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
|
||||
pipeline->graphics.binning.pa_sc_binner_cntl_0)
|
||||
return;
|
||||
|
||||
bool binning_flush = false;
|
||||
|
|
@ -1019,14 +1012,6 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
|
|||
pipeline->graphics.binning.pa_sc_binner_cntl_0 |
|
||||
S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
|
||||
pipeline->graphics.binning.db_dfsm_control);
|
||||
} else {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
|
||||
pipeline->graphics.binning.db_dfsm_control);
|
||||
}
|
||||
|
||||
cmd_buffer->state.context_roll_without_scissor_emitted = true;
|
||||
}
|
||||
|
||||
|
|
@ -2481,11 +2466,6 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->dfsm_allowed) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,9 +71,8 @@ enum {
|
|||
RADV_PERFTEST_CS_WAVE_32 = 1u << 3,
|
||||
RADV_PERFTEST_PS_WAVE_32 = 1u << 4,
|
||||
RADV_PERFTEST_GE_WAVE_32 = 1u << 5,
|
||||
RADV_PERFTEST_DFSM = 1u << 6,
|
||||
RADV_PERFTEST_NO_SAM = 1u << 7,
|
||||
RADV_PERFTEST_SAM = 1u << 8,
|
||||
RADV_PERFTEST_NO_SAM = 1u << 6,
|
||||
RADV_PERFTEST_SAM = 1u << 7,
|
||||
};
|
||||
|
||||
bool radv_init_trace(struct radv_device *device);
|
||||
|
|
|
|||
|
|
@ -814,7 +814,7 @@ static const struct debug_control radv_perftest_options[] = {
|
|||
{"localbos", RADV_PERFTEST_LOCAL_BOS}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
|
||||
{"bolist", RADV_PERFTEST_BO_LIST},
|
||||
{"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32},
|
||||
{"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM},
|
||||
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
|
||||
{"nosam", RADV_PERFTEST_NO_SAM}, {"sam", RADV_PERFTEST_SAM},
|
||||
{NULL, 0}};
|
||||
|
||||
|
|
@ -2975,10 +2975,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
|
||||
|
||||
/* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
|
||||
device->dfsm_allowed =
|
||||
device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
|
||||
|
||||
/* The maximum number of scratch waves. Scratch space isn't divided
|
||||
* evenly between CUs. The number is only a function of the number of CUs.
|
||||
* We can decrease the constant to decrease the scratch buffer size.
|
||||
|
|
|
|||
|
|
@ -4080,7 +4080,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
|
|||
{
|
||||
uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
|
||||
S_028C44_DISABLE_START_OF_PRIM(1);
|
||||
uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
|
||||
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
|
||||
|
|
@ -4112,7 +4111,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
|
|||
}
|
||||
|
||||
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
|
||||
pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
|
||||
}
|
||||
|
||||
struct radv_binning_settings
|
||||
|
|
@ -4162,17 +4160,6 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
|
|||
struct radv_binning_settings settings =
|
||||
radv_get_binning_settings(pipeline->device->physical_device);
|
||||
|
||||
bool disable_start_of_prim = true;
|
||||
uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
|
||||
|
||||
const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
|
||||
!ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
|
||||
db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
|
||||
disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
|
||||
}
|
||||
|
||||
const uint32_t pa_sc_binner_cntl_0 =
|
||||
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
|
||||
S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
|
||||
|
|
@ -4180,11 +4167,10 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
|
|||
S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
|
||||
S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
|
||||
S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
|
||||
S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
|
||||
S_028C44_DISABLE_START_OF_PRIM(1) |
|
||||
S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
|
||||
|
||||
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
|
||||
pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
|
||||
} else
|
||||
radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
|
||||
}
|
||||
|
|
@ -4331,12 +4317,6 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
|
|||
radeon_set_context_reg(
|
||||
ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
|
||||
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
|
||||
|
||||
/* GFX9: Flush DFSM when the AA mode changes. */
|
||||
if (pipeline->device->dfsm_allowed) {
|
||||
radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -5038,12 +5018,6 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
|
|||
ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
|
||||
ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
|
||||
ps->info.ps.writes_sample_mask));
|
||||
|
||||
if (pipeline->device->dfsm_allowed) {
|
||||
/* optimise this? */
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -756,7 +756,6 @@ struct radv_device {
|
|||
struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||
|
||||
bool pbb_allowed;
|
||||
bool dfsm_allowed;
|
||||
uint32_t tess_offchip_block_dw_size;
|
||||
uint32_t scratch_waves;
|
||||
uint32_t dispatch_initiator;
|
||||
|
|
@ -1686,7 +1685,6 @@ struct radv_ia_multi_vgt_param_helpers {
|
|||
|
||||
struct radv_binning_state {
|
||||
uint32_t pa_sc_binner_cntl_0;
|
||||
uint32_t db_dfsm_control;
|
||||
};
|
||||
|
||||
#define SI_GS_PER_ES 128
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue