diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp index 4b1fe6ae974..dd86044f5b8 100644 --- a/src/intel/compiler/brw_compile_cs.cpp +++ b/src/intel/compiler/brw_compile_cs.cpp @@ -135,6 +135,7 @@ const unsigned * brw_compile_cs(const struct brw_compiler *compiler, struct brw_compile_cs_params *params) { + const struct intel_device_info *devinfo = compiler->devinfo; struct nir_shader *nir = params->base.nir; const struct brw_cs_prog_key *key = params->key; struct brw_cs_prog_data *prog_data = params->prog_data; @@ -166,7 +167,9 @@ brw_compile_cs(const struct brw_compiler *compiler, std::unique_ptr v[3]; - for (unsigned simd = 0; simd < 3; simd++) { + for (unsigned i = 0; i < 3; i++) { + const unsigned simd = devinfo->ver >= 30 ? 2 - i : i; + if (!brw_simd_should_compile(simd_state, simd)) continue; @@ -192,16 +195,25 @@ brw_compile_cs(const struct brw_compiler *compiler, params->base.stats != NULL, debug_enabled); - const int first = brw_simd_first_compiled(simd_state); - if (first >= 0) - v[simd]->import_uniforms(v[first].get()); + const bool allow_spilling = simd == 0 || + (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) || + nir->info.workgroup_size_variable; - const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable; + if (devinfo->ver < 30 || nir->info.workgroup_size_variable) { + const int first = brw_simd_first_compiled(simd_state); + if (first >= 0) + v[simd]->import_uniforms(v[first].get()); + assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable)); + } if (run_cs(*v[simd], allow_spilling)) { cs_fill_push_const_info(compiler->devinfo, prog_data); brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers); + + if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers && + !nir->info.workgroup_size_variable) + break; } else { simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg); if (simd > 0) { diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 846ccf4cd24..8316e090357 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -398,6 +398,7 @@ const unsigned * brw_compile_task(const struct brw_compiler *compiler, struct brw_compile_task_params *params) { + const struct intel_device_info *devinfo = compiler->devinfo; struct nir_shader *nir = params->base.nir; const struct brw_task_prog_key *key = params->key; struct brw_task_prog_data *prog_data = params->prog_data; @@ -441,7 +442,9 @@ brw_compile_task(const struct brw_compiler *compiler, std::unique_ptr v[3]; - for (unsigned simd = 0; simd < 3; simd++) { + for (unsigned i = 0; i < 3; i++) { + const unsigned simd = devinfo->ver >= 30 ? 2 - i : i; + if (!brw_simd_should_compile(simd_state, simd)) continue; @@ -467,11 +470,16 @@ brw_compile_task(const struct brw_compiler *compiler, v[simd]->import_uniforms(v[first].get()); } - const bool allow_spilling = !brw_simd_any_compiled(simd_state); - if (run_task_mesh(*v[simd], allow_spilling)) + const bool allow_spilling = simd == 0 || + (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)); + if (run_task_mesh(*v[simd], allow_spilling)) { brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers); - else + + if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers) + break; + } else { simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg); + } } int selected_simd = brw_simd_select(simd_state); @@ -1688,6 +1696,7 @@ const unsigned * brw_compile_mesh(const struct brw_compiler *compiler, struct brw_compile_mesh_params *params) { + const struct intel_device_info *devinfo = compiler->devinfo; struct nir_shader *nir = params->base.nir; const struct brw_mesh_prog_key *key = params->key; struct brw_mesh_prog_data *prog_data = params->prog_data; @@ -1742,7 +1751,9 @@ brw_compile_mesh(const struct brw_compiler *compiler, std::unique_ptr v[3]; - for (int simd = 0; simd < 3; simd++) { + for (unsigned i = 0; i < 3; i++) { + const unsigned simd = devinfo->ver >= 30 ? 2 - i : i; + if (!brw_simd_should_compile(simd_state, simd)) continue; @@ -1780,11 +1791,16 @@ brw_compile_mesh(const struct brw_compiler *compiler, v[simd]->import_uniforms(v[first].get()); } - const bool allow_spilling = !brw_simd_any_compiled(simd_state); - if (run_task_mesh(*v[simd], allow_spilling)) + const bool allow_spilling = simd == 0 || + (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)); + if (run_task_mesh(*v[simd], allow_spilling)) { brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers); - else + + if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers) + break; + } else { simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg); + } } int selected_simd = brw_simd_select(simd_state); diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp index 907b38c3f12..a613189a9ed 100644 --- a/src/intel/compiler/brw_simd_selection.cpp +++ b/src/intel/compiler/brw_simd_selection.cpp @@ -87,23 +87,23 @@ brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd) const auto prog_data = get_prog_data(state); const unsigned width = 8u << simd; + if (state.required_width && state.required_width != width) { + state.error[simd] = "Different than required dispatch width"; + return false; + } + /* For shaders with variable size workgroup, in most cases we can compile * all the variants (exceptions are bindless dispatch & ray queries), since * the choice will happen only at dispatch time. */ const bool workgroup_size_variable = cs_prog_data && cs_prog_data->local_size[0] == 0; - if (!workgroup_size_variable) { + if (!workgroup_size_variable && !state.required_width) { if (state.spilled[simd]) { state.error[simd] = "Would spill"; return false; } - if (state.required_width && state.required_width != width) { - state.error[simd] = "Different than required dispatch width"; - return false; - } - if (cs_prog_data) { const unsigned workgroup_size = cs_prog_data->local_size[0] * cs_prog_data->local_size[1] * @@ -112,8 +112,7 @@ brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd) unsigned max_threads = state.devinfo->max_cs_workgroup_threads; const unsigned min_simd = state.devinfo->ver >= 20 ? 1 : 0; - if (simd > min_simd && state.compiled[simd - 1] && - workgroup_size <= (width / 2)) { + if (simd > min_simd && workgroup_size <= (width / 2)) { state.error[simd] = "Workgroup size already fits in smaller SIMD"; return false; } diff --git a/src/intel/compiler/test_simd_selection.cpp b/src/intel/compiler/test_simd_selection.cpp index f3076b817a0..462d922607f 100644 --- a/src/intel/compiler/test_simd_selection.cpp +++ b/src/intel/compiler/test_simd_selection.cpp @@ -193,7 +193,7 @@ TEST_F(SIMDSelectionCS, WorkgroupSizeVariableNoSIMD8) ASSERT_EQ(prog_data->prog_mask, 1u << SIMD16 | 1u << SIMD32); const unsigned wg_8_1_1[] = { 8, 1, 1 }; - ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD16); + ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), -1); const unsigned wg_16_1_1[] = { 16, 1, 1 }; ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD16); @@ -240,10 +240,10 @@ TEST_F(SIMDSelectionCS, WorkgroupSizeVariableNoSIMD8NoSIMD16) ASSERT_EQ(prog_data->prog_mask, 1u << SIMD32); const unsigned wg_8_1_1[] = { 8, 1, 1 }; - ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD32); + ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), -1); const unsigned wg_16_1_1[] = { 16, 1, 1 }; - ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD32); + ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), -1); const unsigned wg_32_1_1[] = { 32, 1, 1 }; ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD32);