diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build index 8b2356344e8..ad831c74131 100644 --- a/src/nouveau/vulkan/meson.build +++ b/src/nouveau/vulkan/meson.build @@ -16,7 +16,6 @@ nvk_files = files( 'nvk_cmd_pool.c', 'nvk_cmd_pool.h', 'nvk_codegen.c', - 'nvk_compute_pipeline.c', 'nvk_descriptor_set.h', 'nvk_descriptor_set.c', 'nvk_descriptor_set_layout.c', @@ -31,7 +30,6 @@ nvk_files = files( 'nvk_event.h', 'nvk_format.c', 'nvk_format.h', - 'nvk_graphics_pipeline.c', 'nvk_heap.c', 'nvk_heap.h', 'nvk_image.c', @@ -45,8 +43,6 @@ nvk_files = files( 'nvk_nir_lower_descriptors.c', 'nvk_physical_device.c', 'nvk_physical_device.h', - 'nvk_pipeline.c', - 'nvk_pipeline.h', 'nvk_private.h', 'nvk_query_pool.c', 'nvk_query_pool.h', diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index 435aa9ed90e..8d4f6001892 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -13,7 +13,7 @@ #include "nvk_entrypoints.h" #include "nvk_mme.h" #include "nvk_physical_device.h" -#include "nvk_pipeline.h" +#include "nvk_shader.h" #include "vk_pipeline_layout.h" #include "vk_synchronization.h" @@ -551,33 +551,27 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo); } -VKAPI_ATTR void VKAPI_CALL -nvk_CmdBindPipeline(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) +void +nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, + uint32_t stage_count, + const gl_shader_stage *stages, + struct vk_shader ** const shaders) { - VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); - VK_FROM_HANDLE(nvk_pipeline, pipeline, _pipeline); + struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk); struct nvk_device *dev = nvk_cmd_buffer_device(cmd); - for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) { - if(!pipeline->shaders[s]) - continue; - if (pipeline->shaders[s]->info.slm_size) - nvk_device_ensure_slm(dev, pipeline->shaders[s]->info.slm_size); - } + for (uint32_t i = 0; i < stage_count; i++) { + struct nvk_shader *shader = + container_of(shaders[i], struct nvk_shader, vk); - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_GRAPHICS: - assert(pipeline->type == NVK_PIPELINE_GRAPHICS); - nvk_cmd_bind_graphics_pipeline(cmd, (void *)pipeline); - break; - case VK_PIPELINE_BIND_POINT_COMPUTE: - assert(pipeline->type == NVK_PIPELINE_COMPUTE); - nvk_cmd_bind_compute_pipeline(cmd, (void *)pipeline); - break; - default: - unreachable("Unhandled bind point"); + if (shader != NULL && shader->info.slm_size > 0) + nvk_device_ensure_slm(dev, shader->info.slm_size); + + if (stages[i] == MESA_SHADER_COMPUTE || + stages[i] == MESA_SHADER_KERNEL) + nvk_cmd_bind_compute_shader(cmd, shader); + else + nvk_cmd_bind_graphics_shader(cmd, stages[i], shader); } } diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 7c9c4b75119..a087272cd9f 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -24,6 +24,7 @@ struct nvk_cmd_pool; struct nvk_image_view; struct nvk_push_descriptor_set; struct nvk_shader; +struct vk_shader; struct nvk_sample_location { uint8_t x_u4:4; @@ -102,9 +103,11 @@ struct nvk_rendering_state { struct nvk_graphics_state { struct nvk_rendering_state render; - struct nvk_graphics_pipeline *pipeline; struct nvk_descriptor_state descriptors; + uint32_t shaders_dirty; + struct nvk_shader *shaders[MESA_SHADER_MESH + 1]; + /* Used for meta save/restore */ struct nvk_addr_range vb0; @@ -114,8 +117,8 @@ struct nvk_graphics_state { }; struct nvk_compute_state { - struct nvk_compute_pipeline *pipeline; struct nvk_descriptor_state descriptors; + struct nvk_shader *shader; }; struct nvk_cmd_push { @@ -209,10 +212,17 @@ void nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd, void nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd); void nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd); -void nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd, - struct nvk_graphics_pipeline *pipeline); -void nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd, - struct nvk_compute_pipeline *pipeline); +void nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, + uint32_t stage_count, + const gl_shader_stage *stages, + struct vk_shader ** const shaders); + +void nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd, + const gl_shader_stage stage, + struct nvk_shader *shader); + +void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd, + struct nvk_shader *shader); void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx, struct nvk_addr_range addr_range); diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index 077349db396..981adb4423e 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -9,7 +9,7 @@ #include "nvk_entrypoints.h" #include "nvk_mme.h" #include "nvk_physical_device.h" -#include "nvk_pipeline.h" +#include "nvk_shader.h" #include "nouveau_context.h" @@ -41,6 +41,11 @@ #define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a) #define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a) +#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \ + NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a) +#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \ + NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a) + VkResult nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p) { @@ -97,6 +102,129 @@ nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd) memset(&cmd->state.cs, 0, sizeof(cmd->state.cs)); } +static int +gv100_sm_config_smem_size(uint32_t size) +{ + if (size > 64 * 1024) size = 96 * 1024; + else if (size > 32 * 1024) size = 64 * 1024; + else if (size > 16 * 1024) size = 32 * 1024; + else if (size > 8 * 1024) size = 16 * 1024; + else size = 8 * 1024; + return (size / 4096) + 1; +} + +#define nvk_qmd_init_base(qmd, shader, class_id, version_major, version_minor) \ +do { \ + QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \ + shader->info.cs.local_size[0]); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \ + shader->info.cs.local_size[1]); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \ + shader->info.cs.local_size[2]); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \ + QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \ + align(shader->info.slm_size, 0x10)); \ + QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \ + align(shader->info.cs.smem_size, 0x100)); \ +} while (0) + +static void +nva0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) +{ + nvk_qmd_init_base(qmd, shader, A0C0, 00, 06); + + if (shader->info.cs.smem_size <= (16 << 10)) + NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); + else if (shader->info.cs.smem_size <= (32 << 10)) + NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); + else if (shader->info.cs.smem_size <= (48 << 10)) + NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); + else + unreachable("Invalid shared memory size"); + + uint64_t addr = shader->hdr_addr; + assert(addr < 0xffffffff); + NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr); + NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); + NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); +} + +static void +nvc0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) +{ + nvk_qmd_init_base(qmd, shader, C0C0, 02, 01); + + uint64_t addr = shader->hdr_addr; + assert(addr < 0xffffffff); + + NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); + NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr); + NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs); +} + +static void +nvc3c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) +{ + nvk_qmd_init_base(qmd, shader, C3C0, 02, 02); + + NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); + /* those are all QMD 2.2+ */ + NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(shader->info.cs.smem_size)); + NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE)); + NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(shader->info.cs.smem_size)); + + NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); + + uint64_t addr = shader->hdr_addr; + NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); + NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32); +} + +static void +nvc6c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader) +{ + nvk_qmd_init_base(qmd, shader, C6C0, 03, 00); + + NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); + /* those are all QMD 2.2+ */ + NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(shader->info.cs.smem_size)); + NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE)); + NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(shader->info.cs.smem_size)); + + NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs); + + uint64_t addr = shader->hdr_addr; + NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff); + NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32); +} + +static void +nvk_qmd_init(struct nvk_physical_device *pdev, + uint32_t *qmd, const struct nvk_shader *shader) +{ + if (pdev->info.cls_compute >= AMPERE_COMPUTE_A) + nvc6c0_qmd_init(qmd, shader); + else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A) + nvc3c0_qmd_init(qmd, shader); + else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A) + nvc0c0_qmd_init(qmd, shader); + else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A) + nva0c0_qmd_init(qmd, shader); + else + unreachable("Unknown GPU generation"); +} + static void nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd, uint32_t x, uint32_t y, uint32_t z) @@ -171,18 +299,16 @@ nvc6c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, void -nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd, - struct nvk_compute_pipeline *pipeline) +nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd, + struct nvk_shader *shader) { - cmd->state.cs.pipeline = pipeline; + cmd->state.cs.shader = shader; } static uint32_t nvk_compute_local_size(struct nvk_cmd_buffer *cmd) { - const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline; - const struct nvk_shader *shader = - pipeline->base.shaders[MESA_SHADER_COMPUTE]; + const struct nvk_shader *shader = cmd->state.cs.shader; return shader->info.cs.local_size[0] * shader->info.cs.local_size[1] * @@ -196,7 +322,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, struct nvk_device *dev = nvk_cmd_buffer_device(cmd); struct nvk_physical_device *pdev = nvk_device_physical(dev); const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info); - const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline; + const struct nvk_shader *shader = cmd->state.cs.shader; struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors; VkResult result; @@ -224,7 +350,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, uint32_t qmd[128]; memset(qmd, 0, sizeof(qmd)); - memcpy(qmd, pipeline->qmd_template, sizeof(pipeline->qmd_template)); + nvk_qmd_init(pdev, qmd, shader); if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) { nvc6c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd, @@ -244,8 +370,6 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd, desc->root.cs.group_count[2]); } - const struct nvk_shader *shader = - pipeline->base.shaders[MESA_SHADER_COMPUTE]; for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) { const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c]; diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 1e0b7240a58..9c03016fef3 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -11,7 +11,7 @@ #include "nvk_image_view.h" #include "nvk_mme.h" #include "nvk_physical_device.h" -#include "nvk_pipeline.h" +#include "nvk_shader.h" #include "nil_format.h" #include "util/bitpack_helpers.h" @@ -370,13 +370,6 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p) P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr); } - for (uint32_t i = 0; i < 6; i++) { - P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), { - .enable = ENABLE_FALSE, - .type = i, - }); - } - for (uint32_t group = 0; group < 5; group++) { for (uint32_t slot = 0; slot < 16; slot++) { P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), { @@ -495,6 +488,8 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd, nvk_cmd_buffer_dirty_render_pass(cmd); } } + + cmd->state.gfx.shaders_dirty = ~0; } void @@ -514,6 +509,8 @@ nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd) struct nvk_rendering_state render_save = cmd->state.gfx.render; memset(&cmd->state.gfx, 0, sizeof(cmd->state.gfx)); cmd->state.gfx.render = render_save; + + cmd->state.gfx.shaders_dirty = ~0; } static void @@ -951,23 +948,223 @@ nvk_CmdEndRendering(VkCommandBuffer commandBuffer) } void -nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd, - struct nvk_graphics_pipeline *pipeline) +nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd, + const gl_shader_stage stage, + struct nvk_shader *shader) { - cmd->state.gfx.pipeline = pipeline; - vk_cmd_set_dynamic_graphics_state(&cmd->vk, &pipeline->dynamic); + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + + assert(stage < ARRAY_SIZE(cmd->state.gfx.shaders)); + if (cmd->state.gfx.shaders[stage] == shader) + return; + + cmd->state.gfx.shaders[stage] = shader; + cmd->state.gfx.shaders_dirty |= BITFIELD_BIT(stage); /* When a pipeline with tess shaders is bound we need to re-upload the * tessellation parameters at flush_ts_state, as the domain origin can be * dynamic. */ - if (nvk_shader_is_enabled(pipeline->base.shaders[MESA_SHADER_TESS_EVAL])) { - BITSET_SET(cmd->vk.dynamic_graphics_state.dirty, - MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN); + if (stage == MESA_SHADER_TESS_EVAL) + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN); + + /* Emitting SET_HYBRID_ANTI_ALIAS_CONTROL requires the fragment shader */ + if (stage == MESA_SHADER_FRAGMENT) + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES); +} + +static uint32_t +mesa_to_nv9097_shader_type(gl_shader_stage stage) +{ + static const uint32_t mesa_to_nv9097[] = { + [MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX, + [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT, + [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION, + [MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, + [MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL, + }; + assert(stage < ARRAY_SIZE(mesa_to_nv9097)); + return mesa_to_nv9097[stage]; +} + +static uint32_t +nvk_pipeline_bind_group(gl_shader_stage stage) +{ + return stage; +} + +static void +nvk_flush_shaders(struct nvk_cmd_buffer *cmd) +{ + if (cmd->state.gfx.shaders_dirty == 0) + return; + + /* Map shader types to shaders */ + struct nvk_shader *type_shader[6] = { NULL, }; + uint32_t types_dirty = 0; + + const uint32_t gfx_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | + BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | + BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | + BITFIELD_BIT(MESA_SHADER_GEOMETRY) | + BITFIELD_BIT(MESA_SHADER_FRAGMENT); + + u_foreach_bit(stage, cmd->state.gfx.shaders_dirty & gfx_stages) { + uint32_t type = mesa_to_nv9097_shader_type(stage); + types_dirty |= BITFIELD_BIT(type); + + /* Only copy non-NULL shaders because mesh/task alias with vertex and + * tessellation stages. + */ + if (cmd->state.gfx.shaders[stage] != NULL) { + assert(type < ARRAY_SIZE(type_shader)); + assert(type_shader[type] == NULL); + type_shader[type] = cmd->state.gfx.shaders[stage]; + } } - struct nv_push *p = nvk_cmd_buffer_push(cmd, pipeline->push_dw_count); - nv_push_raw(p, pipeline->push_data, pipeline->push_dw_count); + u_foreach_bit(type, types_dirty) { + struct nvk_shader *shader = type_shader[type]; + + /* We always map index == type */ + const uint32_t idx = type; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); + P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), { + .enable = shader != NULL, + .type = type, + }); + + if (shader == NULL) + continue; + + uint64_t addr = shader->hdr_addr; + if (nvk_cmd_buffer_3d_cls(cmd) >= VOLTA_A) { + P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx)); + P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32); + P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr); + } else { + assert(addr < 0xffffffff); + P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr); + } + + P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx)); + P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs); + P_NVC397_SET_PIPELINE_BINDING(p, idx, + nvk_pipeline_bind_group(shader->info.stage)); + + if (shader->info.stage == MESA_SHADER_FRAGMENT) { + p = nvk_cmd_buffer_push(cmd, 9); + + P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A); + P_NV9097_SET_SUBTILING_PERF_KNOB_A(p, { + .fraction_of_spm_register_file_per_subtile = 0x10, + .fraction_of_spm_pixel_output_buffer_per_subtile = 0x40, + .fraction_of_spm_triangle_ram_per_subtile = 0x16, + .fraction_of_max_quads_per_subtile = 0x20, + }); + P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20); + + P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z, + shader->info.fs.early_fragment_tests); + + if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) { + P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK, + shader->info.fs.post_depth_coverage); + } else { + assert(!shader->info.fs.post_depth_coverage); + } + + P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, { + .z_min_unbounded_enable = shader->info.fs.writes_depth, + .z_max_unbounded_enable = shader->info.fs.writes_depth, + }); + } + } + + const uint32_t vtg_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) | + BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | + BITFIELD_BIT(MESA_SHADER_GEOMETRY); + const uint32_t vtgm_stages = vtg_stages | BITFIELD_BIT(MESA_SHADER_MESH); + + if (cmd->state.gfx.shaders_dirty & vtg_stages) { + struct nak_xfb_info *xfb = NULL; + u_foreach_bit(stage, vtg_stages) { + if (cmd->state.gfx.shaders[stage] != NULL) + xfb = &cmd->state.gfx.shaders[stage]->info.vtg.xfb; + } + + if (xfb == NULL) { + struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); + for (uint8_t b = 0; b < 4; b++) + P_IMMD(p, NV9097, SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(b), 0); + } else { + for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) { + const uint8_t attr_count = xfb->attr_count[b]; + /* upload packed varying indices in multiples of 4 bytes */ + const uint32_t n = DIV_ROUND_UP(attr_count, 4); + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 5 + n); + + P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b)); + P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]); + P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count); + P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]); + + if (n > 0) { + P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0)); + P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n); + } + } + } + } + + if (cmd->state.gfx.shaders_dirty & vtgm_stages) { + struct nvk_shader *last_vtgm = NULL; + u_foreach_bit(stage, vtgm_stages) { + if (cmd->state.gfx.shaders[stage] != NULL) + last_vtgm = cmd->state.gfx.shaders[stage]; + } + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); + + P_IMMD(p, NV9097, SET_RT_LAYER, { + .v = 0, + .control = last_vtgm->info.vtg.writes_layer ? + CONTROL_GEOMETRY_SHADER_SELECTS_LAYER : + CONTROL_V_SELECTS_LAYER, + }); + + P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, { + .enable = last_vtgm->info.vtg.writes_point_size, + .slot = 0, + }); + + const uint8_t clip_enable = last_vtgm->info.vtg.clip_enable; + const uint8_t cull_enable = last_vtgm->info.vtg.cull_enable; + P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, { + .plane0 = ((clip_enable | cull_enable) >> 0) & 1, + .plane1 = ((clip_enable | cull_enable) >> 1) & 1, + .plane2 = ((clip_enable | cull_enable) >> 2) & 1, + .plane3 = ((clip_enable | cull_enable) >> 3) & 1, + .plane4 = ((clip_enable | cull_enable) >> 4) & 1, + .plane5 = ((clip_enable | cull_enable) >> 5) & 1, + .plane6 = ((clip_enable | cull_enable) >> 6) & 1, + .plane7 = ((clip_enable | cull_enable) >> 7) & 1, + }); + P_IMMD(p, NV9097, SET_USER_CLIP_OP, { + .plane0 = (cull_enable >> 0) & 1, + .plane1 = (cull_enable >> 1) & 1, + .plane2 = (cull_enable >> 2) & 1, + .plane3 = (cull_enable >> 3) & 1, + .plane4 = (cull_enable >> 4) & 1, + .plane5 = (cull_enable >> 5) & 1, + .plane6 = (cull_enable >> 6) & 1, + .plane7 = (cull_enable >> 7) & 1, + }); + } + + cmd->state.gfx.shaders_dirty = 0; } static void @@ -1045,11 +1242,10 @@ nvk_flush_ts_state(struct nvk_cmd_buffer *cmd) } if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { - const struct nvk_graphics_pipeline *pipeline= cmd->state.gfx.pipeline; const struct nvk_shader *shader = - pipeline->base.shaders[MESA_SHADER_TESS_EVAL]; + cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL]; - if (nvk_shader_is_enabled(shader)) { + if (shader != NULL) { enum nak_ts_prims prims = shader->info.ts.prims; /* When the origin is lower-left, we have to flip the winding order */ if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { @@ -1433,9 +1629,10 @@ nvk_flush_ms_state(struct nvk_cmd_buffer *cmd) dyn->ms.rasterization_samples == render->samples); } - const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline; + struct nvk_shader *fs = cmd->state.gfx.shaders[MESA_SHADER_FRAGMENT]; + const float min_sample_shading = fs != NULL ? fs->min_sample_shading : 0; uint32_t min_samples = ceilf(dyn->ms.rasterization_samples * - pipeline->min_sample_shading); + min_sample_shading); min_samples = util_next_power_of_two(MAX2(1, min_samples)); P_IMMD(p, NV9097, SET_HYBRID_ANTI_ALIAS_CONTROL, { @@ -1923,7 +2120,6 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd) struct nvk_device *dev = nvk_cmd_buffer_device(cmd); struct nvk_physical_device *pdev = nvk_device_physical(dev); const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info); - const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline; struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors; VkResult result; @@ -1952,8 +2148,8 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd) /* Find cbuf maps for the 5 cbuf groups */ const struct nvk_shader *cbuf_shaders[5] = { NULL, }; for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) { - const struct nvk_shader *shader = pipeline->base.shaders[stage]; - if (!shader || shader->code_size == 0) + const struct nvk_shader *shader = cmd->state.gfx.shaders[stage]; + if (shader == NULL) continue; uint32_t group = nvk_cbuf_binding_for_stage(stage); @@ -2053,6 +2249,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd) static void nvk_flush_gfx_state(struct nvk_cmd_buffer *cmd) { + nvk_flush_shaders(cmd); nvk_flush_dynamic_state(cmd); nvk_flush_descriptors(cmd); } diff --git a/src/nouveau/vulkan/nvk_cmd_meta.c b/src/nouveau/vulkan/nvk_cmd_meta.c index 39135ae5967..e038743125c 100644 --- a/src/nouveau/vulkan/nvk_cmd_meta.c +++ b/src/nouveau/vulkan/nvk_cmd_meta.c @@ -60,7 +60,7 @@ struct nvk_meta_save { struct vk_vertex_input_state _dynamic_vi; struct vk_sample_locations_state _dynamic_sl; struct vk_dynamic_graphics_state dynamic; - struct nvk_graphics_pipeline *pipeline; + struct nvk_shader *shaders[MESA_SHADER_MESH + 1]; struct nvk_addr_range vb0; struct nvk_descriptor_set *desc0; bool has_push_desc0; @@ -76,7 +76,9 @@ nvk_meta_begin(struct nvk_cmd_buffer *cmd, save->_dynamic_vi = cmd->state.gfx._dynamic_vi; save->_dynamic_sl = cmd->state.gfx._dynamic_sl; - save->pipeline = cmd->state.gfx.pipeline; + STATIC_ASSERT(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders)); + memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders)); + save->vb0 = cmd->state.gfx.vb0; save->desc0 = cmd->state.gfx.descriptors.sets[0]; @@ -148,8 +150,12 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd, cmd->vk.dynamic_graphics_state.set, sizeof(cmd->vk.dynamic_graphics_state.set)); - if (save->pipeline) - nvk_cmd_bind_graphics_pipeline(cmd, save->pipeline); + for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + nvk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]); + } nvk_cmd_bind_vertex_buffer(cmd, 0, save->vb0); diff --git a/src/nouveau/vulkan/nvk_device.c b/src/nouveau/vulkan/nvk_device.c index f2c266e89f3..1639c5ce217 100644 --- a/src/nouveau/vulkan/nvk_device.c +++ b/src/nouveau/vulkan/nvk_device.c @@ -8,6 +8,7 @@ #include "nvk_entrypoints.h" #include "nvk_instance.h" #include "nvk_physical_device.h" +#include "nvk_shader.h" #include "vk_pipeline_cache.h" #include "vulkan/wsi/wsi_common.h" @@ -146,6 +147,8 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice, if (result != VK_SUCCESS) goto fail_alloc; + dev->vk.shader_ops = &nvk_device_shader_ops; + drmDevicePtr drm_device = NULL; int ret = drmGetDeviceFromDevId(pdev->render_dev, 0, &drm_device); if (ret != 0) { diff --git a/src/nouveau/vulkan/nvk_graphics_pipeline.c b/src/nouveau/vulkan/nvk_graphics_pipeline.c deleted file mode 100644 index a705a89f05c..00000000000 --- a/src/nouveau/vulkan/nvk_graphics_pipeline.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Copyright © 2022 Collabora Ltd. and Red Hat Inc. - * SPDX-License-Identifier: MIT - */ -#include "nvk_pipeline.h" - -#include "nvk_device.h" -#include "nvk_mme.h" -#include "nvk_physical_device.h" -#include "nvk_shader.h" - -#include "vk_nir.h" -#include "vk_pipeline.h" -#include "vk_pipeline_layout.h" - -#include "nv_push.h" - -#include "nouveau_context.h" - -#include "compiler/spirv/nir_spirv.h" - -#include "nvk_cl9097.h" -#include "nvk_clb197.h" -#include "nvk_clc397.h" - -static void -emit_pipeline_xfb_state(struct nv_push *p, const struct nak_xfb_info *xfb) -{ - for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) { - const uint8_t attr_count = xfb->attr_count[b]; - P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b)); - P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]); - P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count); - P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]); - - /* upload packed varying indices in multiples of 4 bytes */ - const uint32_t n = DIV_ROUND_UP(attr_count, 4); - if (n > 0) { - P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0)); - P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n); - } - } -} - -static const uint32_t mesa_to_nv9097_shader_type[] = { - [MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX, - [MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT, - [MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION, - [MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY, - [MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL, -}; - -static void -merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info) -{ - /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: - * - * "PointMode. Controls generation of points rather than triangles - * or lines. This functionality defaults to disabled, and is - * enabled if either shader stage includes the execution mode. - * - * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, - * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, - * and OutputVertices, it says: - * - * "One mode must be set in at least one of the tessellation - * shader stages." - * - * So, the fields can be set in either the TCS or TES, but they must - * agree if set in both. Our backend looks at TES, so bitwise-or in - * the values from the TCS. - */ - assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 || - tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); - tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; - - assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || - tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || - tcs_info->tess.spacing == tes_info->tess.spacing); - tes_info->tess.spacing |= tcs_info->tess.spacing; - - assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || - tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || - tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); - tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; - tes_info->tess.ccw |= tcs_info->tess.ccw; - tes_info->tess.point_mode |= tcs_info->tess.point_mode; - - /* Copy the merged info back to the TCS */ - tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out; - tcs_info->tess.spacing = tes_info->tess.spacing; - tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode; - tcs_info->tess.ccw = tes_info->tess.ccw; - tcs_info->tess.point_mode = tes_info->tess.point_mode; -} - -VkResult -nvk_graphics_pipeline_create(struct nvk_device *dev, - struct vk_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline) -{ - VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout); - struct nvk_graphics_pipeline *pipeline; - VkResult result = VK_SUCCESS; - - pipeline = (void *)nvk_pipeline_zalloc(dev, NVK_PIPELINE_GRAPHICS, - sizeof(*pipeline), pAllocator); - if (pipeline == NULL) - return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); - - VkPipelineCreateFlags2KHR pipeline_flags = - vk_graphics_pipeline_create_flags(pCreateInfo); - - if (pipeline_flags & - VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) - cache = NULL; - - struct vk_graphics_pipeline_all_state all; - struct vk_graphics_pipeline_state state = {}; - result = vk_graphics_pipeline_state_fill(&dev->vk, &state, pCreateInfo, - NULL, 0, &all, NULL, 0, NULL); - assert(result == VK_SUCCESS); - - VkPipelineCreationFeedbackEXT pipeline_feedback = { - .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, - }; - VkPipelineCreationFeedbackEXT stage_feedbacks[MESA_SHADER_STAGES] = { 0 }; - - int64_t pipeline_start = os_time_get_nano(); - - const VkPipelineCreationFeedbackCreateInfo *creation_feedback = - vk_find_struct_const(pCreateInfo->pNext, - PIPELINE_CREATION_FEEDBACK_CREATE_INFO); - - const VkPipelineShaderStageCreateInfo *infos[MESA_SHADER_STAGES] = {}; - nir_shader *nir[MESA_SHADER_STAGES] = {}; - struct vk_pipeline_robustness_state robustness[MESA_SHADER_STAGES]; - - struct vk_pipeline_cache_object *cache_objs[MESA_SHADER_STAGES] = {}; - - struct nak_fs_key fs_key_tmp, *fs_key = NULL; - nvk_populate_fs_key(&fs_key_tmp, &state); - fs_key = &fs_key_tmp; - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; - gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); - infos[stage] = sinfo; - } - - for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) { - const VkPipelineShaderStageCreateInfo *sinfo = infos[stage]; - if (sinfo == NULL) - continue; - - vk_pipeline_robustness_state_fill(&dev->vk, &robustness[stage], - pCreateInfo->pNext, sinfo->pNext); - } - - for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) { - const VkPipelineShaderStageCreateInfo *sinfo = infos[stage]; - if (sinfo == NULL) - continue; - - unsigned char sha1[SHA1_DIGEST_LENGTH]; - nvk_hash_shader(sha1, sinfo, &robustness[stage], - state.rp->view_mask != 0, pipeline_layout, - stage == MESA_SHADER_FRAGMENT ? fs_key : NULL); - - if (cache) { - bool cache_hit = false; - cache_objs[stage] = vk_pipeline_cache_lookup_object(cache, &sha1, sizeof(sha1), - &nvk_shader_ops, &cache_hit); - pipeline->base.shaders[stage] = - container_of(cache_objs[stage], struct nvk_shader, base); - - if (cache_hit && cache != dev->mem_cache) - pipeline_feedback.flags |= - VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; - } - - if (!cache_objs[stage] && - pCreateInfo->flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) { - result = VK_PIPELINE_COMPILE_REQUIRED; - goto fail; - } - } - - for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) { - const VkPipelineShaderStageCreateInfo *sinfo = infos[stage]; - if (sinfo == NULL || cache_objs[stage]) - continue; - - result = nvk_shader_stage_to_nir(dev, sinfo, &robustness[stage], - cache, NULL, &nir[stage]); - if (result != VK_SUCCESS) - goto fail; - } - - if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_TESS_EVAL]) { - merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info); - } - - for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) { - const VkPipelineShaderStageCreateInfo *sinfo = infos[stage]; - if (sinfo == NULL) - continue; - - if (!cache_objs[stage]) { - int64_t stage_start = os_time_get_nano(); - - unsigned char sha1[SHA1_DIGEST_LENGTH]; - nvk_hash_shader(sha1, sinfo, &robustness[stage], - state.rp->view_mask != 0, pipeline_layout, - stage == MESA_SHADER_FRAGMENT ? fs_key : NULL); - - struct nvk_shader *shader = nvk_shader_init(dev, sha1, SHA1_DIGEST_LENGTH); - if(shader == NULL) { - result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - nvk_lower_nir(dev, nir[stage], &robustness[stage], - state.rp->view_mask != 0, - pipeline_layout->set_count, - pipeline_layout->set_layouts, - &shader->cbuf_map); - - result = nvk_compile_nir(dev, nir[stage], - pipeline_flags, &robustness[stage], - stage == MESA_SHADER_FRAGMENT ? fs_key : NULL, - cache, shader); - - if (result == VK_SUCCESS) { - cache_objs[stage] = &shader->base; - - if (cache) - cache_objs[stage] = vk_pipeline_cache_add_object(cache, - cache_objs[stage]); - - stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; - pipeline->base.shaders[stage] = - container_of(cache_objs[stage], struct nvk_shader, base); - } - - stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; - ralloc_free(nir[stage]); - } - - if (result != VK_SUCCESS) - goto fail; - - result = nvk_shader_upload(dev, pipeline->base.shaders[stage]); - if (result != VK_SUCCESS) - goto fail; - } - - struct nv_push push; - nv_push_init(&push, pipeline->push_data, ARRAY_SIZE(pipeline->push_data)); - struct nv_push *p = &push; - - bool force_max_samples = false; - - struct nvk_shader *last_geom = NULL; - for (gl_shader_stage stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - struct nvk_shader *shader = pipeline->base.shaders[stage]; - uint32_t idx = mesa_to_nv9097_shader_type[stage]; - - P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), { - .enable = nvk_shader_is_enabled(shader), - .type = mesa_to_nv9097_shader_type[stage], - }); - - if (!nvk_shader_is_enabled(shader)) - continue; - - if (stage != MESA_SHADER_FRAGMENT) - last_geom = shader; - - uint64_t addr = shader->hdr_addr; - if (dev->pdev->info.cls_eng3d >= VOLTA_A) { - P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx)); - P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32); - P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr); - } else { - assert(addr < 0xffffffff); - P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr); - } - - P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx)); - P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs); - P_NVC397_SET_PIPELINE_BINDING(p, idx, nvk_cbuf_binding_for_stage(stage)); - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - break; - - case MESA_SHADER_FRAGMENT: - P_IMMD(p, NV9097, SET_SUBTILING_PERF_KNOB_A, { - .fraction_of_spm_register_file_per_subtile = 0x10, - .fraction_of_spm_pixel_output_buffer_per_subtile = 0x40, - .fraction_of_spm_triangle_ram_per_subtile = 0x16, - .fraction_of_max_quads_per_subtile = 0x20, - }); - P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20); - - P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z, - shader->info.fs.early_fragment_tests); - - if (dev->pdev->info.cls_eng3d >= MAXWELL_B) { - P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK, - shader->info.fs.post_depth_coverage); - } else { - assert(!shader->info.fs.post_depth_coverage); - } - - P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, { - .z_min_unbounded_enable = shader->info.fs.writes_depth, - .z_max_unbounded_enable = shader->info.fs.writes_depth, - }); - - /* If we're using the incoming sample mask and doing sample shading, - * we have to do sample shading "to the max", otherwise there's no - * way to tell which sets of samples are covered by the current - * invocation. - */ - force_max_samples = shader->info.fs.reads_sample_mask || - shader->info.fs.uses_sample_shading; - break; - - default: - unreachable("Unsupported shader stage"); - } - } - - const uint8_t clip_cull = last_geom->info.vtg.clip_enable | - last_geom->info.vtg.cull_enable; - if (clip_cull) { - P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, { - .plane0 = (clip_cull >> 0) & 1, - .plane1 = (clip_cull >> 1) & 1, - .plane2 = (clip_cull >> 2) & 1, - .plane3 = (clip_cull >> 3) & 1, - .plane4 = (clip_cull >> 4) & 1, - .plane5 = (clip_cull >> 5) & 1, - .plane6 = (clip_cull >> 6) & 1, - .plane7 = (clip_cull >> 7) & 1, - }); - P_IMMD(p, NV9097, SET_USER_CLIP_OP, { - .plane0 = (last_geom->info.vtg.cull_enable >> 0) & 1, - .plane1 = (last_geom->info.vtg.cull_enable >> 1) & 1, - .plane2 = (last_geom->info.vtg.cull_enable >> 2) & 1, - .plane3 = (last_geom->info.vtg.cull_enable >> 3) & 1, - .plane4 = (last_geom->info.vtg.cull_enable >> 4) & 1, - .plane5 = (last_geom->info.vtg.cull_enable >> 5) & 1, - .plane6 = (last_geom->info.vtg.cull_enable >> 6) & 1, - .plane7 = (last_geom->info.vtg.cull_enable >> 7) & 1, - }); - } - - /* TODO: prog_selects_layer */ - P_IMMD(p, NV9097, SET_RT_LAYER, { - .v = 0, - .control = last_geom->info.vtg.writes_layer ? - CONTROL_GEOMETRY_SHADER_SELECTS_LAYER : - CONTROL_V_SELECTS_LAYER, - }); - - P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, { - .enable = last_geom->info.vtg.writes_point_size, - .slot = 0, - }); - - emit_pipeline_xfb_state(&push, &last_geom->info.vtg.xfb); - - pipeline->push_dw_count = nv_push_dw_count(&push); - - if (force_max_samples) - pipeline->min_sample_shading = 1; - else if (state.ms != NULL && state.ms->sample_shading_enable) - pipeline->min_sample_shading = CLAMP(state.ms->min_sample_shading, 0, 1); - else - pipeline->min_sample_shading = 0; - - pipeline->dynamic.vi = &pipeline->_dynamic_vi; - pipeline->dynamic.ms.sample_locations = &pipeline->_dynamic_sl; - vk_dynamic_graphics_state_fill(&pipeline->dynamic, &state); - - pipeline_feedback.duration = os_time_get_nano() - pipeline_start; - if (creation_feedback) { - *creation_feedback->pPipelineCreationFeedback = pipeline_feedback; - - int fb_count = creation_feedback->pipelineStageCreationFeedbackCount; - if (pCreateInfo->stageCount == fb_count) { - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - const VkPipelineShaderStageCreateInfo *sinfo = - &pCreateInfo->pStages[i]; - gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); - creation_feedback->pPipelineStageCreationFeedbacks[i] = - stage_feedbacks[stage]; - } - } - } - - *pPipeline = nvk_pipeline_to_handle(&pipeline->base); - - return VK_SUCCESS; - -fail: - vk_object_free(&dev->vk, pAllocator, pipeline); - return result; -} diff --git a/src/nouveau/vulkan/nvk_pipeline.h b/src/nouveau/vulkan/nvk_pipeline.h deleted file mode 100644 index a68b353a9c8..00000000000 --- a/src/nouveau/vulkan/nvk_pipeline.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2022 Collabora Ltd. and Red Hat Inc. - * SPDX-License-Identifier: MIT - */ -#ifndef NVK_PIPELINE_H -#define NVK_PIPELINE_H 1 - -#include "nvk_private.h" -#include "nvk_shader.h" - -#include "vk_graphics_state.h" -#include "vk_object.h" - -struct vk_pipeline_cache; - -enum nvk_pipeline_type { - NVK_PIPELINE_GRAPHICS, - NVK_PIPELINE_COMPUTE, -}; - -struct nvk_pipeline { - struct vk_object_base base; - - enum nvk_pipeline_type type; - - struct nvk_shader *shaders[MESA_SHADER_STAGES]; -}; - -VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_pipeline, base, VkPipeline, - VK_OBJECT_TYPE_PIPELINE) - -void -nvk_pipeline_free(struct nvk_device *dev, - struct nvk_pipeline *pipeline, - const VkAllocationCallbacks *pAllocator); -struct nvk_pipeline * -nvk_pipeline_zalloc(struct nvk_device *dev, - enum nvk_pipeline_type type, size_t size, - const VkAllocationCallbacks *pAllocator); - -struct nvk_compute_pipeline { - struct nvk_pipeline base; - - uint32_t qmd_template[64]; -}; - -VkResult -nvk_compute_pipeline_create(struct nvk_device *dev, - struct vk_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline); - -struct nvk_graphics_pipeline { - struct nvk_pipeline base; - - uint32_t push_data[192]; - uint32_t push_dw_count; - - float min_sample_shading; - - struct vk_vertex_input_state _dynamic_vi; - struct vk_sample_locations_state _dynamic_sl; - struct vk_dynamic_graphics_state dynamic; -}; - -VkResult -nvk_graphics_pipeline_create(struct nvk_device *dev, - struct vk_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline); - -#endif diff --git a/src/nouveau/vulkan/nvk_query_pool.c b/src/nouveau/vulkan/nvk_query_pool.c index e9c6cf40e8b..eeef9761c9e 100644 --- a/src/nouveau/vulkan/nvk_query_pool.c +++ b/src/nouveau/vulkan/nvk_query_pool.c @@ -11,7 +11,6 @@ #include "nvk_event.h" #include "nvk_mme.h" #include "nvk_physical_device.h" -#include "nvk_pipeline.h" #include "vk_meta.h" #include "vk_pipeline.h" @@ -973,12 +972,13 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd, } /* Save pipeline and push constants */ - struct nvk_compute_pipeline *pipeline_save = cmd->state.cs.pipeline; + struct nvk_shader *shader_save = cmd->state.cs.shader; uint8_t push_save[NVK_MAX_PUSH_SIZE]; memcpy(push_save, desc->root.push, NVK_MAX_PUSH_SIZE); - nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + dev->vk.dispatch_table.CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd), + VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline); nvk_CmdPushConstants(nvk_cmd_buffer_to_handle(cmd), layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push), &push); @@ -986,11 +986,8 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd, nvk_CmdDispatchBase(nvk_cmd_buffer_to_handle(cmd), 0, 0, 0, 1, 1, 1); /* Restore pipeline and push constants */ - if (pipeline_save) { - nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd), - VK_PIPELINE_BIND_POINT_COMPUTE, - nvk_pipeline_to_handle(&pipeline_save->base)); - } + if (shader_save) + nvk_cmd_bind_compute_shader(cmd, shader_save); memcpy(desc->root.push, push_save, NVK_MAX_PUSH_SIZE); } diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index b2b70011a78..181205d9ba5 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -8,12 +8,11 @@ #include "nvk_descriptor_set_layout.h" #include "nvk_device.h" #include "nvk_physical_device.h" -#include "nvk_pipeline.h" #include "nvk_sampler.h" +#include "nvk_shader.h" #include "vk_nir_convert_ycbcr.h" #include "vk_pipeline.h" -#include "vk_pipeline_cache.h" #include "vk_pipeline_layout.h" #include "vk_shader_module.h" #include "vk_ycbcr_conversion.h" @@ -186,7 +185,7 @@ nvk_preprocess_nir(struct vk_physical_device *vk_pdev, nir_shader *nir) nvk_cg_preprocess_nir(nir); } -void +static void nvk_populate_fs_key(struct nak_fs_key *key, const struct vk_graphics_pipeline_state *state) { @@ -195,6 +194,9 @@ nvk_populate_fs_key(struct nak_fs_key *key, key->sample_locations_cb = 0; key->sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations); + if (state == NULL) + return; + if (state->pipeline_flags & VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) key->zs_self_dep = true; @@ -208,6 +210,25 @@ nvk_populate_fs_key(struct nak_fs_key *key, key->force_sample_shading = true; } +static void +nvk_hash_graphics_state(struct vk_physical_device *device, + const struct vk_graphics_pipeline_state *state, + VkShaderStageFlags stages, + blake3_hash blake3_out) +{ + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) { + struct nak_fs_key key; + nvk_populate_fs_key(&key, state); + _mesa_blake3_update(&blake3_ctx, &key, sizeof(key)); + + const bool is_multiview = state->rp->view_mask != 0; + _mesa_blake3_update(&blake3_ctx, &is_multiview, sizeof(is_multiview)); + } + _mesa_blake3_final(&blake3_ctx, blake3_out); +} + static bool lower_load_global_constant_offset_instr(nir_builder *b, nir_intrinsic_instr *intrin, @@ -290,52 +311,6 @@ lookup_ycbcr_conversion(const void *_state, uint32_t set, &sampler->vk.ycbcr_conversion->state : NULL; } -VkResult -nvk_shader_stage_to_nir(struct nvk_device *dev, - const VkPipelineShaderStageCreateInfo *sinfo, - const struct vk_pipeline_robustness_state *rstate, - struct vk_pipeline_cache *cache, - void *mem_ctx, struct nir_shader **nir_out) -{ - struct nvk_physical_device *pdev = nvk_device_physical(dev); - const gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); - const nir_shader_compiler_options *nir_options = - nvk_get_nir_options(&pdev->vk, stage, rstate); - - unsigned char stage_sha1[SHA1_DIGEST_LENGTH]; - vk_pipeline_hash_shader_stage(sinfo, rstate, stage_sha1); - - if (cache == NULL) - cache = dev->mem_cache; - - nir_shader *nir = vk_pipeline_cache_lookup_nir(cache, stage_sha1, - sizeof(stage_sha1), - nir_options, NULL, - mem_ctx); - if (nir != NULL) { - *nir_out = nir; - return VK_SUCCESS; - } - - const struct spirv_to_nir_options spirv_options = - nvk_get_spirv_options(&pdev->vk, stage, rstate); - - VkResult result = vk_pipeline_shader_stage_to_nir(&dev->vk, sinfo, - &spirv_options, - nir_options, - mem_ctx, &nir); - if (result != VK_SUCCESS) - return result; - - nvk_preprocess_nir(&dev->pdev->vk, nir); - - vk_pipeline_cache_add_nir(cache, stage_sha1, sizeof(stage_sha1), nir); - - *nir_out = nir; - - return VK_SUCCESS; -} - static inline bool nir_has_image_var(nir_shader *nir) { @@ -493,13 +468,13 @@ nvk_shader_dump(struct nvk_shader *shader) static VkResult nvk_compile_nir_with_nak(struct nvk_physical_device *pdev, nir_shader *nir, - VkPipelineCreateFlagBits2KHR pipeline_flags, + VkShaderCreateFlagsEXT shader_flags, const struct vk_pipeline_robustness_state *rs, const struct nak_fs_key *fs_key, struct nvk_shader *shader) { const bool dump_asm = - pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; + shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA; nir_variable_mode robust2_modes = 0; if (rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT) @@ -515,38 +490,18 @@ nvk_compile_nir_with_nak(struct nvk_physical_device *pdev, return VK_SUCCESS; } -struct nvk_shader * -nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size) -{ - VK_MULTIALLOC(ma); - VK_MULTIALLOC_DECL(&ma, struct nvk_shader, shader, 1); - VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_key_data, key_size); - - if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) - return NULL; - - memcpy(obj_key_data, key_data, key_size); - - vk_pipeline_cache_object_init(&dev->vk, &shader->base, - &nvk_shader_ops, obj_key_data, key_size); - - return shader; -} - -VkResult +static VkResult nvk_compile_nir(struct nvk_device *dev, nir_shader *nir, - VkPipelineCreateFlagBits2KHR pipeline_flags, + VkShaderCreateFlagsEXT shader_flags, const struct vk_pipeline_robustness_state *rs, const struct nak_fs_key *fs_key, - struct vk_pipeline_cache *cache, struct nvk_shader *shader) { struct nvk_physical_device *pdev = nvk_device_physical(dev); VkResult result; if (use_nak(pdev, nir->info.stage)) { - result = nvk_compile_nir_with_nak(pdev, nir, pipeline_flags, rs, + result = nvk_compile_nir_with_nak(pdev, nir, shader_flags, rs, fs_key, shader); } else { result = nvk_cg_compile_nir(pdev, nir, fs_key, shader); @@ -555,7 +510,7 @@ nvk_compile_nir(struct nvk_device *dev, nir_shader *nir, return result; if (nir->constant_data_size > 0) { - uint32_t data_align = nvk_min_cbuf_alignment(&dev->pdev->info); + uint32_t data_align = nvk_min_cbuf_alignment(&pdev->info); uint32_t data_size = align(nir->constant_data_size, data_align); void *data = malloc(data_size); @@ -650,11 +605,15 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader) return result; } -void -nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader) +static const struct vk_shader_ops nvk_shader_ops; + +static void +nvk_shader_destroy(struct vk_device *vk_dev, + struct vk_shader *vk_shader, + const VkAllocationCallbacks* pAllocator) { - if (shader == NULL) - return; + struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk); + struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk); if (shader->upload_size > 0) { nvk_heap_free(dev, &dev->shader_heap, @@ -671,127 +630,330 @@ nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader) free((void *)shader->data_ptr); - vk_free(&dev->vk.alloc, shader); + vk_shader_free(&dev->vk, pAllocator, &shader->vk); } -void -nvk_hash_shader(unsigned char *hash, - const VkPipelineShaderStageCreateInfo *sinfo, - const struct vk_pipeline_robustness_state *rs, - bool is_multiview, - const struct vk_pipeline_layout *layout, - const struct nak_fs_key *fs_key) +static VkResult +nvk_compile_shader(struct nvk_device *dev, + struct vk_shader_compile_info *info, + const struct vk_graphics_pipeline_state *state, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shader_out) { - struct mesa_sha1 ctx; + struct nvk_shader *shader; + VkResult result; - _mesa_sha1_init(&ctx); + /* We consume the NIR, regardless of success or failure */ + nir_shader *nir = info->nir; - unsigned char stage_sha1[SHA1_DIGEST_LENGTH]; - vk_pipeline_hash_shader_stage(sinfo, rs, stage_sha1); + shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info->stage, + pAllocator, sizeof(*shader)); + if (shader == NULL) { + ralloc_free(nir); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } - _mesa_sha1_update(&ctx, stage_sha1, sizeof(stage_sha1)); + /* TODO: Multiview with ESO */ + const bool is_multiview = state && state->rp->view_mask != 0; - _mesa_sha1_update(&ctx, &is_multiview, sizeof(is_multiview)); + nvk_lower_nir(dev, nir, info->robustness, is_multiview, + info->set_layout_count, info->set_layouts, + &shader->cbuf_map); - if (layout) { - _mesa_sha1_update(&ctx, &layout->create_flags, - sizeof(layout->create_flags)); - _mesa_sha1_update(&ctx, &layout->set_count, sizeof(layout->set_count)); - for (int i = 0; i < layout->set_count; i++) { - struct nvk_descriptor_set_layout *set = - vk_to_nvk_descriptor_set_layout(layout->set_layouts[i]); - _mesa_sha1_update(&ctx, &set->vk.blake3, sizeof(set->vk.blake3)); + struct nak_fs_key fs_key_tmp, *fs_key = NULL; + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + nvk_populate_fs_key(&fs_key_tmp, state); + fs_key = &fs_key_tmp; + } + + result = nvk_compile_nir(dev, nir, info->flags, info->robustness, + fs_key, shader); + ralloc_free(nir); + if (result != VK_SUCCESS) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } + + result = nvk_shader_upload(dev, shader); + if (result != VK_SUCCESS) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } + + if (info->stage == MESA_SHADER_FRAGMENT) { + if (shader->info.fs.reads_sample_mask || + shader->info.fs.uses_sample_shading) { + shader->min_sample_shading = 1; + } else if (state != NULL && state->ms != NULL && + state->ms->sample_shading_enable) { + shader->min_sample_shading = + CLAMP(state->ms->min_sample_shading, 0, 1); + } else { + shader->min_sample_shading = 0; } } - if(fs_key) - _mesa_sha1_update(&ctx, fs_key, sizeof(*fs_key)); + *shader_out = &shader->vk; - _mesa_sha1_final(&ctx, hash); + return VK_SUCCESS; } -static bool -nvk_shader_serialize(struct vk_pipeline_cache_object *object, - struct blob *blob); - -static struct vk_pipeline_cache_object * -nvk_shader_deserialize(struct vk_pipeline_cache *cache, - const void *key_data, - size_t key_size, - struct blob_reader *blob); - -void -nvk_shader_destroy(struct vk_device *_dev, - struct vk_pipeline_cache_object *object) +static VkResult +nvk_compile_shaders(struct vk_device *vk_dev, + uint32_t shader_count, + struct vk_shader_compile_info *infos, + const struct vk_graphics_pipeline_state *state, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shaders_out) { - struct nvk_device *dev = - container_of(_dev, struct nvk_device, vk); - struct nvk_shader *shader = - container_of(object, struct nvk_shader, base); + struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk); - nvk_shader_finish(dev, shader); + for (uint32_t i = 0; i < shader_count; i++) { + VkResult result = nvk_compile_shader(dev, &infos[i], state, + pAllocator, &shaders_out[i]); + if (result != VK_SUCCESS) { + /* Clean up all the shaders before this point */ + for (uint32_t j = 0; j < i; j++) + nvk_shader_destroy(&dev->vk, shaders_out[j], pAllocator); + + /* Clean up all the NIR after this point */ + for (uint32_t j = i + 1; j < shader_count; j++) + ralloc_free(infos[j].nir); + + /* Memset the output array */ + memset(shaders_out, 0, shader_count * sizeof(*shaders_out)); + + return result; + } + } + + return VK_SUCCESS; } -const struct vk_pipeline_cache_object_ops nvk_shader_ops = { - .serialize = nvk_shader_serialize, - .deserialize = nvk_shader_deserialize, - .destroy = nvk_shader_destroy, -}; +static VkResult +nvk_deserialize_shader(struct vk_device *vk_dev, + struct blob_reader *blob, + uint32_t binary_version, + const VkAllocationCallbacks* pAllocator, + struct vk_shader **shader_out) +{ + struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk); + struct nvk_shader *shader; + VkResult result; + + struct nak_shader_info info; + blob_copy_bytes(blob, &info, sizeof(info)); + + struct nvk_cbuf_map cbuf_map; + blob_copy_bytes(blob, &cbuf_map, sizeof(cbuf_map)); + + float min_sample_shading; + blob_copy_bytes(blob, &min_sample_shading, sizeof(min_sample_shading)); + + const uint32_t code_size = blob_read_uint32(blob); + const uint32_t data_size = blob_read_uint32(blob); + if (blob->overrun) + return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info.stage, + pAllocator, sizeof(*shader)); + if (shader == NULL) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->info = info; + shader->cbuf_map = cbuf_map; + shader->min_sample_shading = min_sample_shading; + shader->code_size = code_size; + shader->data_size = data_size; + + shader->code_ptr = malloc(code_size); + if (shader->code_ptr == NULL) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + shader->data_ptr = malloc(data_size); + if (shader->data_ptr == NULL) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + blob_copy_bytes(blob, (void *)shader->code_ptr, shader->code_size); + blob_copy_bytes(blob, (void *)shader->data_ptr, shader->data_size); + if (blob->overrun) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + } + + result = nvk_shader_upload(dev, shader); + if (result != VK_SUCCESS) { + nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return result; + } + + *shader_out = &shader->vk; + + return VK_SUCCESS; +} static bool -nvk_shader_serialize(struct vk_pipeline_cache_object *object, +nvk_shader_serialize(struct vk_device *vk_dev, + const struct vk_shader *vk_shader, struct blob *blob) { - struct nvk_shader *shader = - container_of(object, struct nvk_shader, base); + struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk); + + /* We can't currently cache assmbly */ + if (shader->nak != NULL && shader->nak->asm_str != NULL) + return false; blob_write_bytes(blob, &shader->info, sizeof(shader->info)); blob_write_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map)); + blob_write_bytes(blob, &shader->min_sample_shading, + sizeof(shader->min_sample_shading)); + blob_write_uint32(blob, shader->code_size); - blob_write_bytes(blob, shader->code_ptr, shader->code_size); blob_write_uint32(blob, shader->data_size); + blob_write_bytes(blob, shader->code_ptr, shader->code_size); blob_write_bytes(blob, shader->data_ptr, shader->data_size); + return !blob->out_of_memory; +} + +#define WRITE_STR(field, ...) ({ \ + memset(field, 0, sizeof(field)); \ + UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \ + assert(i > 0 && i < sizeof(field)); \ +}) + +static VkResult +nvk_shader_get_executable_properties( + UNUSED struct vk_device *device, + const struct vk_shader *vk_shader, + uint32_t *executable_count, + VkPipelineExecutablePropertiesKHR *properties) +{ + struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk); + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, + properties, executable_count); + + vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { + props->stages = mesa_to_vk_shader_stage(shader->info.stage); + props->subgroupSize = 32; + WRITE_STR(props->name, "%s", + _mesa_shader_stage_to_string(shader->info.stage)); + WRITE_STR(props->description, "%s shader", + _mesa_shader_stage_to_string(shader->info.stage)); + } + + return vk_outarray_status(&out); +} + +static VkResult +nvk_shader_get_executable_statistics( + UNUSED struct vk_device *device, + const struct vk_shader *vk_shader, + uint32_t executable_index, + uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk); + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, + statistics, statistic_count); + + assert(executable_index == 0); + + vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { + WRITE_STR(stat->name, "Code Size"); + WRITE_STR(stat->description, + "Size of the compiled shader binary, in bytes"); + stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; + stat->value.u64 = shader->code_size; + } + + vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { + WRITE_STR(stat->name, "Number of GPRs"); + WRITE_STR(stat->description, "Number of GPRs used by this pipeline"); + stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; + stat->value.u64 = shader->info.num_gprs; + } + + vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { + WRITE_STR(stat->name, "SLM Size"); + WRITE_STR(stat->description, + "Size of shader local (scratch) memory, in bytes"); + stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; + stat->value.u64 = shader->info.slm_size; + } + + return vk_outarray_status(&out); +} + +static bool +write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, + const char *data) +{ + ir->isText = VK_TRUE; + + size_t data_len = strlen(data) + 1; + + if (ir->pData == NULL) { + ir->dataSize = data_len; + return true; + } + + strncpy(ir->pData, data, ir->dataSize); + if (ir->dataSize < data_len) + return false; + + ir->dataSize = data_len; return true; } -static struct vk_pipeline_cache_object * -nvk_shader_deserialize(struct vk_pipeline_cache *cache, - const void *key_data, - size_t key_size, - struct blob_reader *blob) +static VkResult +nvk_shader_get_executable_internal_representations( + UNUSED struct vk_device *device, + const struct vk_shader *vk_shader, + uint32_t executable_index, + uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR *internal_representations) { - struct nvk_device *dev = - container_of(cache->base.device, struct nvk_device, vk); - struct nvk_shader *shader = - nvk_shader_init(dev, key_data, key_size); + struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk); + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, + internal_representations, + internal_representation_count); + bool incomplete_text = false; - if (!shader) - return NULL; + assert(executable_index == 0); - blob_copy_bytes(blob, &shader->info, sizeof(shader->info)); - blob_copy_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map)); + if (shader->nak != NULL && shader->nak->asm_str != NULL) { + vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { + WRITE_STR(ir->name, "NAK assembly"); + WRITE_STR(ir->description, "NAK assembly"); + if (!write_ir_text(ir, shader->nak->asm_str)) + incomplete_text = true; + } + } - shader->code_size = blob_read_uint32(blob); - void *code_ptr = malloc(shader->code_size); - if (!code_ptr) - goto fail; - - blob_copy_bytes(blob, code_ptr, shader->code_size); - shader->code_ptr = code_ptr; - - shader->data_size = blob_read_uint32(blob); - void *data_ptr = malloc(shader->data_size); - if (!data_ptr) - goto fail; - - blob_copy_bytes(blob, data_ptr, shader->data_size); - shader->data_ptr = data_ptr; - - return &shader->base; - -fail: - /* nvk_shader_destroy frees both shader and shader->xfb */ - nvk_shader_destroy(cache->base.device, &shader->base); - return NULL; + return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); } + +static const struct vk_shader_ops nvk_shader_ops = { + .destroy = nvk_shader_destroy, + .serialize = nvk_shader_serialize, + .get_executable_properties = nvk_shader_get_executable_properties, + .get_executable_statistics = nvk_shader_get_executable_statistics, + .get_executable_internal_representations = + nvk_shader_get_executable_internal_representations, +}; + +const struct vk_device_shader_ops nvk_device_shader_ops = { + .get_nir_options = nvk_get_nir_options, + .get_spirv_options = nvk_get_spirv_options, + .preprocess_nir = nvk_preprocess_nir, + .hash_graphics_state = nvk_hash_graphics_state, + .compile = nvk_compile_shaders, + .deserialize = nvk_deserialize_shader, + .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state, + .cmd_bind_shaders = nvk_cmd_bind_shaders, +}; diff --git a/src/nouveau/vulkan/nvk_shader.h b/src/nouveau/vulkan/nvk_shader.h index 08128bb95bc..9e6e93e6f95 100644 --- a/src/nouveau/vulkan/nvk_shader.h +++ b/src/nouveau/vulkan/nvk_shader.h @@ -14,6 +14,8 @@ #include "nir.h" #include "nouveau_bo.h" +#include "vk_shader.h" + struct nak_shader_bin; struct nvk_device; struct nvk_physical_device; @@ -57,11 +59,14 @@ struct nvk_cbuf_map { }; struct nvk_shader { - struct vk_pipeline_cache_object base; + struct vk_shader vk; struct nak_shader_info info; struct nvk_cbuf_map cbuf_map; + /* Only relevant for fragment shaders */ + float min_sample_shading; + struct nak_shader_bin *nak; const void *code_ptr; uint32_t code_size; @@ -84,11 +89,7 @@ struct nvk_shader { uint64_t data_addr; }; -static inline bool -nvk_shader_is_enabled(const struct nvk_shader *shader) -{ - return shader && shader->upload_size > 0; -} +extern const struct vk_device_shader_ops nvk_device_shader_ops; VkShaderStageFlags nvk_nak_stages(const struct nv_device_info *info); @@ -115,18 +116,6 @@ nvk_nir_lower_descriptors(nir_shader *nir, uint32_t set_layout_count, struct vk_descriptor_set_layout * const *set_layouts, struct nvk_cbuf_map *cbuf_map_out); - -VkResult -nvk_shader_stage_to_nir(struct nvk_device *dev, - const VkPipelineShaderStageCreateInfo *sinfo, - const struct vk_pipeline_robustness_state *rstate, - struct vk_pipeline_cache *cache, - void *mem_ctx, struct nir_shader **nir_out); - -void -nvk_populate_fs_key(struct nak_fs_key *key, - const struct vk_graphics_pipeline_state *state); - void nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, const struct vk_pipeline_robustness_state *rs, @@ -135,37 +124,9 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, struct vk_descriptor_set_layout * const *set_layouts, struct nvk_cbuf_map *cbuf_map_out); -VkResult -nvk_compile_nir(struct nvk_device *dev, nir_shader *nir, - VkPipelineCreateFlagBits2KHR pipeline_flags, - const struct vk_pipeline_robustness_state *rstate, - const struct nak_fs_key *fs_key, - struct vk_pipeline_cache *cache, - struct nvk_shader *shader); - VkResult nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader); -struct nvk_shader * -nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size); - -extern const struct vk_pipeline_cache_object_ops nvk_shader_ops; - -void -nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader); - -void -nvk_hash_shader(unsigned char *hash, - const VkPipelineShaderStageCreateInfo *sinfo, - const struct vk_pipeline_robustness_state *rstate, - bool is_multiview, - const struct vk_pipeline_layout *layout, - const struct nak_fs_key *fs_key); - -void -nvk_shader_destroy(struct vk_device *dev, - struct vk_pipeline_cache_object *object); - /* Codegen wrappers. * * TODO: Delete these once NAK supports everything.