radv: remove DFSM

DFSM has never been enabled by default because it was slower.
RadeonSI is also dropping support for this because they discovered
that's actually not efficient in practice.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10968>
This commit is contained in:
Samuel Pitoiset 2021-05-25 08:26:23 +02:00 committed by Marge Bot
parent f0f0a21f13
commit 69ae02151d
6 changed files with 5 additions and 60 deletions

View file

@ -633,8 +633,6 @@ RADV driver environment variables
enable wave32 for compute shaders (GFX10+)
``dccmsaa``
enable DCC for MSAA images
``dfsm``
enable DFSM
``gewave32``
enable wave32 for vertex/tess/geometry shaders (GFX10+)
``localbos``

View file

@ -950,12 +950,6 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(cs, centroid_priority);
radeon_emit(cs, centroid_priority >> 32);
/* GFX9: Flush DFSM when the AA mode changes. */
if (cmd_buffer->device->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
@ -1001,8 +995,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
if (old_pipeline &&
old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
pipeline->graphics.binning.pa_sc_binner_cntl_0)
return;
bool binning_flush = false;
@ -1019,14 +1012,6 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
pipeline->graphics.binning.pa_sc_binner_cntl_0 |
S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
pipeline->graphics.binning.db_dfsm_control);
} else {
radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
pipeline->graphics.binning.db_dfsm_control);
}
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
@ -2481,11 +2466,6 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
}
if (cmd_buffer->device->dfsm_allowed) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
}

View file

@ -71,9 +71,8 @@ enum {
RADV_PERFTEST_CS_WAVE_32 = 1u << 3,
RADV_PERFTEST_PS_WAVE_32 = 1u << 4,
RADV_PERFTEST_GE_WAVE_32 = 1u << 5,
RADV_PERFTEST_DFSM = 1u << 6,
RADV_PERFTEST_NO_SAM = 1u << 7,
RADV_PERFTEST_SAM = 1u << 8,
RADV_PERFTEST_NO_SAM = 1u << 6,
RADV_PERFTEST_SAM = 1u << 7,
};
bool radv_init_trace(struct radv_device *device);

View file

@ -814,7 +814,7 @@ static const struct debug_control radv_perftest_options[] = {
{"localbos", RADV_PERFTEST_LOCAL_BOS}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
{"bolist", RADV_PERFTEST_BO_LIST},
{"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32},
{"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM},
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
{"nosam", RADV_PERFTEST_NO_SAM}, {"sam", RADV_PERFTEST_SAM},
{NULL, 0}};
@ -2975,10 +2975,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
/* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
device->dfsm_allowed =
device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
/* The maximum number of scratch waves. Scratch space isn't divided
* evenly between CUs. The number is only a function of the number of CUs.
* We can decrease the constant to decrease the scratch buffer size.

View file

@ -4080,7 +4080,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
{
uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1);
uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
@ -4112,7 +4111,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
}
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
}
struct radv_binning_settings
@ -4162,17 +4160,6 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
struct radv_binning_settings settings =
radv_get_binning_settings(pipeline->device->physical_device);
bool disable_start_of_prim = true;
uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
!ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
}
const uint32_t pa_sc_binner_cntl_0 =
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
@ -4180,11 +4167,10 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
S_028C44_DISABLE_START_OF_PRIM(1) |
S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
} else
radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
}
@ -4331,12 +4317,6 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
radeon_set_context_reg(
ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
/* GFX9: Flush DFSM when the AA mode changes. */
if (pipeline->device->dfsm_allowed) {
radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
}
static void
@ -5038,12 +5018,6 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
ps->info.ps.writes_sample_mask));
if (pipeline->device->dfsm_allowed) {
/* optimise this? */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
}
static void

View file

@ -756,7 +756,6 @@ struct radv_device {
struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
bool dfsm_allowed;
uint32_t tess_offchip_block_dw_size;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
@ -1686,7 +1685,6 @@ struct radv_ia_multi_vgt_param_helpers {
struct radv_binning_state {
uint32_t pa_sc_binner_cntl_0;
uint32_t db_dfsm_control;
};
#define SI_GS_PER_ES 128