diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index c217abaaa56..5ad9d4836dc 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -4713,7 +4713,7 @@ struct anv_graphics_pipeline { /* Pre computed CS instructions that can directly be copied into * anv_cmd_buffer. */ - uint32_t batch_data[416]; + uint32_t batch_data[480]; /* Urb setup utilized by this pipeline. */ struct intel_urb_config urb_cfg; @@ -4733,12 +4733,17 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr vs; struct anv_gfx_state_ptr hs; struct anv_gfx_state_ptr ds; + struct anv_gfx_state_ptr vs_protected; + struct anv_gfx_state_ptr hs_protected; + struct anv_gfx_state_ptr ds_protected; struct anv_gfx_state_ptr task_control; + struct anv_gfx_state_ptr task_control_protected; struct anv_gfx_state_ptr task_shader; struct anv_gfx_state_ptr task_redistrib; struct anv_gfx_state_ptr clip_mesh; struct anv_gfx_state_ptr mesh_control; + struct anv_gfx_state_ptr mesh_control_protected; struct anv_gfx_state_ptr mesh_shader; struct anv_gfx_state_ptr mesh_distrib; struct anv_gfx_state_ptr sbe_mesh; @@ -4756,8 +4761,10 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr wm; struct anv_gfx_state_ptr so; struct anv_gfx_state_ptr gs; + struct anv_gfx_state_ptr gs_protected; struct anv_gfx_state_ptr te; struct anv_gfx_state_ptr ps; + struct anv_gfx_state_ptr ps_protected; struct anv_gfx_state_ptr vfg; } partial; }; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 829068b9002..fdd609a6664 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -53,6 +53,16 @@ anv_gfx_pipeline_add(struct anv_graphics_pipeline *pipeline, return batch; } +#define anv_pipeline_emit_tmp(pipeline, field, cmd, name) \ + for (struct cmd name = { __anv_cmd_header(cmd) }, \ + *_dst = (void *) field; \ + __builtin_expect(_dst != NULL, 1); \ + ({ __anv_cmd_pack(cmd)(&(pipeline)->base.base.batch, \ + _dst, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ + _dst = NULL; \ + })) + #define anv_pipeline_emit(pipeline, state, cmd, name) \ for (struct cmd name = { __anv_cmd_header(cmd) }, \ *_dst = anv_batch_emit_dwords( \ @@ -67,6 +77,25 @@ anv_gfx_pipeline_add(struct anv_graphics_pipeline *pipeline, _dst = NULL; \ })) +#define anv_pipeline_emit_merge(pipeline, state, dwords, cmd, name) \ + for (struct cmd name = { 0 }, \ + *_dst = anv_batch_emit_dwords( \ + anv_gfx_pipeline_add(pipeline, \ + &(pipeline)->state, \ + __anv_cmd_length(cmd)), \ + __anv_cmd_length(cmd)); \ + __builtin_expect(_dst != NULL, 1); \ + ({ uint32_t _partial[__anv_cmd_length(cmd)]; \ + assert((pipeline)->state.len == __anv_cmd_length(cmd)); \ + __anv_cmd_pack(cmd)(&(pipeline)->base.base.batch, \ + _partial, &name); \ + for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ + ((uint32_t *)_dst)[i] = _partial[i] | dwords[i]; \ + } \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ + _dst = NULL; \ + })) + #define anv_pipeline_emitn(pipeline, state, n, cmd, ...) ({ \ void *__dst = anv_batch_emit_dwords( \ anv_gfx_pipeline_add(pipeline, &(pipeline)->state, n), n); \ @@ -82,6 +111,8 @@ anv_gfx_pipeline_add(struct anv_graphics_pipeline *pipeline, __dst; \ }) +#define pipeline_needs_protected(pipeline) \ + ((pipeline)->device->vk.enabled_features.protectedMemory) static uint32_t vertex_element_comp_control(enum isl_format format, unsigned comp) @@ -1179,14 +1210,17 @@ get_scratch_space(const struct anv_shader_bin *bin) static UNUSED uint32_t get_scratch_surf(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct anv_shader_bin *bin) + const struct anv_shader_bin *bin, + bool protected) { if (bin->prog_data->total_scratch == 0) return 0; + struct anv_scratch_pool *pool = protected ? + &pipeline->device->protected_scratch_pool : + &pipeline->device->scratch_pool; struct anv_bo *bo = - anv_scratch_pool_alloc(pipeline->device, - &pipeline->device->scratch_pool, + anv_scratch_pool_alloc(pipeline->device, pool, stage, bin->prog_data->total_scratch); anv_reloc_list_add_bo(pipeline->batch.relocs, bo); return anv_scratch_pool_get_surf(pipeline->device, @@ -1204,7 +1238,8 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline) assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)); - anv_pipeline_emit(pipeline, final.vs, GENX(3DSTATE_VS), vs) { + uint32_t vs_dwords[GENX(3DSTATE_VS_length)]; + anv_pipeline_emit_tmp(pipeline, vs_dwords, GENX(3DSTATE_VS), vs) { vs.Enable = true; vs.StatisticsEnable = true; vs.KernelStartPointer = vs_bin->kernel.offset; @@ -1262,15 +1297,30 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline) vs.UserClipDistanceCullTestEnableBitmask = vs_prog_data->base.cull_distance_mask; -#if GFX_VERx10 >= 125 - vs.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_VERTEX, vs_bin); -#else +#if GFX_VERx10 < 125 vs.PerThreadScratchSpace = get_scratch_space(vs_bin); vs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base.base, MESA_SHADER_VERTEX, vs_bin); #endif } + + anv_pipeline_emit_merge(pipeline, final.vs, vs_dwords, GENX(3DSTATE_VS), vs) { +#if GFX_VERx10 >= 125 + vs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_VERTEX, + vs_bin, false); +#endif + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, final.vs_protected, + vs_dwords, GENX(3DSTATE_VS), vs) { +#if GFX_VERx10 >= 125 + vs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_VERTEX, + vs_bin, true); +#endif + } + } } static void @@ -1279,7 +1329,9 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, { if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { anv_pipeline_emit(pipeline, final.hs, GENX(3DSTATE_HS), hs); + anv_pipeline_emit(pipeline, final.hs_protected, GENX(3DSTATE_HS), hs); anv_pipeline_emit(pipeline, final.ds, GENX(3DSTATE_DS), ds); + anv_pipeline_emit(pipeline, final.ds_protected, GENX(3DSTATE_DS), ds); return; } @@ -1292,7 +1344,8 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline); const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); - anv_pipeline_emit(pipeline, final.hs, GENX(3DSTATE_HS), hs) { + uint32_t hs_dwords[GENX(3DSTATE_HS_length)]; + anv_pipeline_emit_tmp(pipeline, hs_dwords, GENX(3DSTATE_HS), hs) { hs.Enable = true; hs.StatisticsEnable = true; hs.KernelStartPointer = tcs_bin->kernel.offset; @@ -1323,10 +1376,7 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, tcs_prog_data->base.base.dispatch_grf_start_reg >> 5; #endif -#if GFX_VERx10 >= 125 - hs.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_TESS_CTRL, tcs_bin); -#else +#if GFX_VERx10 < 125 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); hs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base.base, MESA_SHADER_TESS_CTRL, tcs_bin); @@ -1345,7 +1395,8 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; }; - anv_pipeline_emit(pipeline, final.ds, GENX(3DSTATE_DS), ds) { + uint32_t ds_dwords[GENX(3DSTATE_DS_length)]; + anv_pipeline_emit_tmp(pipeline, ds_dwords, GENX(3DSTATE_DS), ds) { ds.Enable = true; ds.StatisticsEnable = true; ds.KernelStartPointer = tes_bin->kernel.offset; @@ -1380,15 +1431,45 @@ emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline, #if GFX_VER >= 12 ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id; #endif -#if GFX_VERx10 >= 125 - ds.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); -#else +#if GFX_VERx10 < 125 ds.PerThreadScratchSpace = get_scratch_space(tes_bin); ds.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base.base, MESA_SHADER_TESS_EVAL, tes_bin); #endif } + + anv_pipeline_emit_merge(pipeline, final.hs, hs_dwords, GENX(3DSTATE_HS), hs) { +#if GFX_VERx10 >= 125 + hs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_TESS_CTRL, + tcs_bin, false); +#endif + } + anv_pipeline_emit_merge(pipeline, final.ds, ds_dwords, GENX(3DSTATE_DS), ds) { +#if GFX_VERx10 >= 125 + ds.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_TESS_EVAL, + tes_bin, false); +#endif + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, final.hs_protected, + hs_dwords, GENX(3DSTATE_HS), hs) { +#if GFX_VERx10 >= 125 + hs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_TESS_CTRL, + tcs_bin, true); +#endif + } + anv_pipeline_emit_merge(pipeline, final.ds_protected, + ds_dwords, GENX(3DSTATE_DS), ds) { +#if GFX_VERx10 >= 125 + ds.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_TESS_EVAL, + tes_bin, true); +#endif + } + } } static UNUSED bool @@ -1464,6 +1545,7 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) { if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { anv_pipeline_emit(pipeline, partial.gs, GENX(3DSTATE_GS), gs); + anv_pipeline_emit(pipeline, partial.gs_protected, GENX(3DSTATE_GS), gs); return; } @@ -1472,7 +1554,8 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) pipeline->base.shaders[MESA_SHADER_GEOMETRY]; const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); - anv_pipeline_emit(pipeline, partial.gs, GENX(3DSTATE_GS), gs) { + uint32_t gs_dwords[GENX(3DSTATE_GS_length)]; + anv_pipeline_emit_tmp(pipeline, gs_dwords, GENX(3DSTATE_GS), gs) { gs.Enable = true; gs.StatisticsEnable = true; gs.KernelStartPointer = gs_bin->kernel.offset; @@ -1511,15 +1594,29 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) gs.UserClipDistanceCullTestEnableBitmask = gs_prog_data->base.cull_distance_mask; -#if GFX_VERx10 >= 125 - gs.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_GEOMETRY, gs_bin); -#else +#if GFX_VERx10 < 125 gs.PerThreadScratchSpace = get_scratch_space(gs_bin); gs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base.base, MESA_SHADER_GEOMETRY, gs_bin); #endif } + + anv_pipeline_emit_merge(pipeline, partial.gs, gs_dwords, GENX(3DSTATE_GS), gs) { +#if GFX_VERx10 >= 125 + gs.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_GEOMETRY, gs_bin, false); +#endif + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, partial.gs_protected, + gs_dwords, GENX(3DSTATE_GS), gs) { +#if GFX_VERx10 >= 125 + gs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, + MESA_SHADER_GEOMETRY, + gs_bin, true); +#endif + } + } } static void @@ -1582,12 +1679,14 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps); + anv_pipeline_emit(pipeline, partial.ps_protected, GENX(3DSTATE_PS), ps); return; } const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps) { + uint32_t ps_dwords[GENX(3DSTATE_PS_length)]; + anv_pipeline_emit_tmp(pipeline, ps_dwords, GENX(3DSTATE_PS), ps) { #if GFX_VER == 12 assert(wm_prog_data->dispatch_multi == 0 || (wm_prog_data->dispatch_multi == 16 && wm_prog_data->max_polygons == 2)); @@ -1612,15 +1711,27 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; -#if GFX_VERx10 >= 125 - ps.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin); -#else +#if GFX_VERx10 < 125 ps.PerThreadScratchSpace = get_scratch_space(fs_bin); ps.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin); #endif } + anv_pipeline_emit_merge(pipeline, partial.ps, ps_dwords, GENX(3DSTATE_PS), ps) { +#if GFX_VERx10 >= 125 + ps.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin, false); +#endif + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, partial.ps_protected, + ps_dwords, GENX(3DSTATE_PS), ps) { +#if GFX_VERx10 >= 125 + ps.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin, true); +#endif + } + } } static void @@ -1768,6 +1879,8 @@ emit_task_state(struct anv_graphics_pipeline *pipeline) if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { anv_pipeline_emit(pipeline, final.task_control, GENX(3DSTATE_TASK_CONTROL), zero); + anv_pipeline_emit(pipeline, final.task_control_protected, + GENX(3DSTATE_TASK_CONTROL), zero); anv_pipeline_emit(pipeline, final.task_shader, GENX(3DSTATE_TASK_SHADER), zero); anv_pipeline_emit(pipeline, final.task_redistrib, @@ -1778,15 +1891,26 @@ emit_task_state(struct anv_graphics_pipeline *pipeline) const struct anv_shader_bin *task_bin = pipeline->base.shaders[MESA_SHADER_TASK]; - anv_pipeline_emit(pipeline, final.task_control, - GENX(3DSTATE_TASK_CONTROL), tc) { + uint32_t task_control_dwords[GENX(3DSTATE_TASK_CONTROL_length)]; + anv_pipeline_emit_tmp(pipeline, task_control_dwords, GENX(3DSTATE_TASK_CONTROL), tc) { tc.TaskShaderEnable = true; tc.StatisticsEnable = true; - tc.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_TASK, task_bin); tc.MaximumNumberofThreadGroups = 511; } + anv_pipeline_emit_merge(pipeline, final.task_control, + task_control_dwords, GENX(3DSTATE_TASK_CONTROL), tc) { + tc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_TASK, task_bin, false); + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, final.task_control_protected, + task_control_dwords, GENX(3DSTATE_TASK_CONTROL), tc) { + tc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_TASK, task_bin, true); + } + } + const struct intel_device_info *devinfo = pipeline->base.base.device->info; const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline); const struct intel_cs_dispatch_info task_dispatch = @@ -1840,18 +1964,29 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline) const struct anv_shader_bin *mesh_bin = pipeline->base.shaders[MESA_SHADER_MESH]; const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline); - anv_pipeline_emit(pipeline, final.mesh_control, - GENX(3DSTATE_MESH_CONTROL), mc) { + uint32_t mesh_control_dwords[GENX(3DSTATE_MESH_CONTROL_length)]; + anv_pipeline_emit_tmp(pipeline, mesh_control_dwords, GENX(3DSTATE_MESH_CONTROL), mc) { mc.MeshShaderEnable = true; mc.StatisticsEnable = true; - mc.ScratchSpaceBuffer = - get_scratch_surf(&pipeline->base.base, MESA_SHADER_MESH, mesh_bin); mc.MaximumNumberofThreadGroups = 511; #if GFX_VER >= 20 mc.VPandRTAIndexAutostripEnable = mesh_prog_data->autostrip_enable; #endif } + anv_pipeline_emit_merge(pipeline, final.mesh_control, + mesh_control_dwords, GENX(3DSTATE_MESH_CONTROL), mc) { + mc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_MESH, mesh_bin, false); + } + if (pipeline_needs_protected(&pipeline->base.base)) { + anv_pipeline_emit_merge(pipeline, final.mesh_control_protected, + mesh_control_dwords, GENX(3DSTATE_MESH_CONTROL), mc) { + mc.ScratchSpaceBuffer = + get_scratch_surf(&pipeline->base.base, MESA_SHADER_MESH, mesh_bin, true); + } + } + const struct intel_device_info *devinfo = pipeline->base.base.device->info; const struct intel_cs_dispatch_info mesh_dispatch = brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL); @@ -1989,6 +2124,8 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, if (device->vk.enabled_extensions.EXT_mesh_shader) { anv_pipeline_emit(pipeline, final.mesh_control, GENX(3DSTATE_MESH_CONTROL), zero); + anv_pipeline_emit(pipeline, final.mesh_control_protected, + GENX(3DSTATE_MESH_CONTROL), zero); anv_pipeline_emit(pipeline, final.mesh_shader, GENX(3DSTATE_MESH_SHADER), zero); anv_pipeline_emit(pipeline, final.mesh_distrib, @@ -1999,6 +2136,8 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, GENX(3DSTATE_SBE_MESH), zero); anv_pipeline_emit(pipeline, final.task_control, GENX(3DSTATE_TASK_CONTROL), zero); + anv_pipeline_emit(pipeline, final.task_control_protected, + GENX(3DSTATE_TASK_CONTROL), zero); anv_pipeline_emit(pipeline, final.task_shader, GENX(3DSTATE_TASK_SHADER), zero); anv_pipeline_emit(pipeline, final.task_redistrib, @@ -2018,6 +2157,11 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, anv_pipeline_emit(pipeline, partial.te, GENX(3DSTATE_TE), te); anv_pipeline_emit(pipeline, partial.gs, GENX(3DSTATE_GS), gs); + anv_pipeline_emit(pipeline, final.vs_protected, GENX(3DSTATE_VS), vs); + anv_pipeline_emit(pipeline, final.hs_protected, GENX(3DSTATE_HS), hs); + anv_pipeline_emit(pipeline, final.ds_protected, GENX(3DSTATE_DS), ds); + anv_pipeline_emit(pipeline, partial.gs_protected, GENX(3DSTATE_GS), gs); + /* BSpec 46303 forbids both 3DSTATE_MESH_CONTROL.MeshShaderEnable * and 3DSTATE_STREAMOUT.SOFunctionEnable to be 1. */