nvk: Switch to shader objects

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27024>
This commit is contained in:
Faith Ekstrand 2024-01-11 18:18:54 -06:00 committed by Marge Bot
parent 4001658c18
commit 813b253939
12 changed files with 745 additions and 786 deletions

View file

@ -16,7 +16,6 @@ nvk_files = files(
'nvk_cmd_pool.c',
'nvk_cmd_pool.h',
'nvk_codegen.c',
'nvk_compute_pipeline.c',
'nvk_descriptor_set.h',
'nvk_descriptor_set.c',
'nvk_descriptor_set_layout.c',
@ -31,7 +30,6 @@ nvk_files = files(
'nvk_event.h',
'nvk_format.c',
'nvk_format.h',
'nvk_graphics_pipeline.c',
'nvk_heap.c',
'nvk_heap.h',
'nvk_image.c',
@ -45,8 +43,6 @@ nvk_files = files(
'nvk_nir_lower_descriptors.c',
'nvk_physical_device.c',
'nvk_physical_device.h',
'nvk_pipeline.c',
'nvk_pipeline.h',
'nvk_private.h',
'nvk_query_pool.c',
'nvk_query_pool.h',

View file

@ -13,7 +13,7 @@
#include "nvk_entrypoints.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_pipeline.h"
#include "nvk_shader.h"
#include "vk_pipeline_layout.h"
#include "vk_synchronization.h"
@ -551,33 +551,27 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
}
VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
void
nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
uint32_t stage_count,
const gl_shader_stage *stages,
struct vk_shader ** const shaders)
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(nvk_pipeline, pipeline, _pipeline);
struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
if(!pipeline->shaders[s])
continue;
if (pipeline->shaders[s]->info.slm_size)
nvk_device_ensure_slm(dev, pipeline->shaders[s]->info.slm_size);
}
for (uint32_t i = 0; i < stage_count; i++) {
struct nvk_shader *shader =
container_of(shaders[i], struct nvk_shader, vk);
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
assert(pipeline->type == NVK_PIPELINE_GRAPHICS);
nvk_cmd_bind_graphics_pipeline(cmd, (void *)pipeline);
break;
case VK_PIPELINE_BIND_POINT_COMPUTE:
assert(pipeline->type == NVK_PIPELINE_COMPUTE);
nvk_cmd_bind_compute_pipeline(cmd, (void *)pipeline);
break;
default:
unreachable("Unhandled bind point");
if (shader != NULL && shader->info.slm_size > 0)
nvk_device_ensure_slm(dev, shader->info.slm_size);
if (stages[i] == MESA_SHADER_COMPUTE ||
stages[i] == MESA_SHADER_KERNEL)
nvk_cmd_bind_compute_shader(cmd, shader);
else
nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
}
}

View file

@ -24,6 +24,7 @@ struct nvk_cmd_pool;
struct nvk_image_view;
struct nvk_push_descriptor_set;
struct nvk_shader;
struct vk_shader;
struct nvk_sample_location {
uint8_t x_u4:4;
@ -102,9 +103,11 @@ struct nvk_rendering_state {
struct nvk_graphics_state {
struct nvk_rendering_state render;
struct nvk_graphics_pipeline *pipeline;
struct nvk_descriptor_state descriptors;
uint32_t shaders_dirty;
struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
/* Used for meta save/restore */
struct nvk_addr_range vb0;
@ -114,8 +117,8 @@ struct nvk_graphics_state {
};
struct nvk_compute_state {
struct nvk_compute_pipeline *pipeline;
struct nvk_descriptor_state descriptors;
struct nvk_shader *shader;
};
struct nvk_cmd_push {
@ -209,10 +212,17 @@ void nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd,
void nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd);
void nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd);
void nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd,
struct nvk_graphics_pipeline *pipeline);
void nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd,
struct nvk_compute_pipeline *pipeline);
void nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
uint32_t stage_count,
const gl_shader_stage *stages,
struct vk_shader ** const shaders);
void nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
const gl_shader_stage stage,
struct nvk_shader *shader);
void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
struct nvk_shader *shader);
void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx,
struct nvk_addr_range addr_range);

View file

@ -9,7 +9,7 @@
#include "nvk_entrypoints.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_pipeline.h"
#include "nvk_shader.h"
#include "nouveau_context.h"
@ -41,6 +41,11 @@
#define NVC6C0_QMDV03_00_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC6C0, QMDV03_00, ##a)
#define NVC6C0_QMDV03_00_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC6C0, QMDV03_00, ##a)
#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
VkResult
nvk_push_dispatch_state_init(struct nvk_device *dev, struct nv_push *p)
{
@ -97,6 +102,129 @@ nvk_cmd_invalidate_compute_state(struct nvk_cmd_buffer *cmd)
memset(&cmd->state.cs, 0, sizeof(cmd->state.cs));
}
static int
gv100_sm_config_smem_size(uint32_t size)
{
if (size > 64 * 1024) size = 96 * 1024;
else if (size > 32 * 1024) size = 64 * 1024;
else if (size > 16 * 1024) size = 32 * 1024;
else if (size > 8 * 1024) size = 16 * 1024;
else size = 8 * 1024;
return (size / 4096) + 1;
}
#define nvk_qmd_init_base(qmd, shader, class_id, version_major, version_minor) \
do { \
QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->info.num_barriers); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \
shader->info.cs.local_size[0]); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \
shader->info.cs.local_size[1]); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \
shader->info.cs.local_size[2]); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \
QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \
align(shader->info.slm_size, 0x10)); \
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \
align(shader->info.cs.smem_size, 0x100)); \
} while (0)
static void
nva0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
{
nvk_qmd_init_base(qmd, shader, A0C0, 00, 06);
if (shader->info.cs.smem_size <= (16 << 10))
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
else if (shader->info.cs.smem_size <= (32 << 10))
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
else if (shader->info.cs.smem_size <= (48 << 10))
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
else
unreachable("Invalid shared memory size");
uint64_t addr = shader->hdr_addr;
assert(addr < 0xffffffff);
NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
}
static void
nvc0c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
{
nvk_qmd_init_base(qmd, shader, C0C0, 02, 01);
uint64_t addr = shader->hdr_addr;
assert(addr < 0xffffffff);
NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->info.num_gprs);
}
static void
nvc3c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
{
nvk_qmd_init_base(qmd, shader, C3C0, 02, 02);
NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
/* those are all QMD 2.2+ */
NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(shader->info.cs.smem_size));
NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(shader->info.cs.smem_size));
NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
uint64_t addr = shader->hdr_addr;
NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
}
static void
nvc6c0_qmd_init(uint32_t *qmd, const struct nvk_shader *shader)
{
nvk_qmd_init_base(qmd, shader, C6C0, 03, 00);
NVC6C0_QMDV03_00_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
/* those are all QMD 2.2+ */
NVC6C0_QMDV03_00_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(shader->info.cs.smem_size));
NVC6C0_QMDV03_00_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(NVK_MAX_SHARED_SIZE));
NVC6C0_QMDV03_00_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
gv100_sm_config_smem_size(shader->info.cs.smem_size));
NVC6C0_QMDV03_00_VAL_SET(qmd, REGISTER_COUNT_V, shader->info.num_gprs);
uint64_t addr = shader->hdr_addr;
NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
NVC6C0_QMDV03_00_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, addr >> 32);
}
static void
nvk_qmd_init(struct nvk_physical_device *pdev,
uint32_t *qmd, const struct nvk_shader *shader)
{
if (pdev->info.cls_compute >= AMPERE_COMPUTE_A)
nvc6c0_qmd_init(qmd, shader);
else if (pdev->info.cls_compute >= VOLTA_COMPUTE_A)
nvc3c0_qmd_init(qmd, shader);
else if (pdev->info.cls_compute >= PASCAL_COMPUTE_A)
nvc0c0_qmd_init(qmd, shader);
else if (pdev->info.cls_compute >= KEPLER_COMPUTE_A)
nva0c0_qmd_init(qmd, shader);
else
unreachable("Unknown GPU generation");
}
static void
nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
uint32_t x, uint32_t y, uint32_t z)
@ -171,18 +299,16 @@ nvc6c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
void
nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd,
struct nvk_compute_pipeline *pipeline)
nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
struct nvk_shader *shader)
{
cmd->state.cs.pipeline = pipeline;
cmd->state.cs.shader = shader;
}
static uint32_t
nvk_compute_local_size(struct nvk_cmd_buffer *cmd)
{
const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
const struct nvk_shader *shader =
pipeline->base.shaders[MESA_SHADER_COMPUTE];
const struct nvk_shader *shader = cmd->state.cs.shader;
return shader->info.cs.local_size[0] *
shader->info.cs.local_size[1] *
@ -196,7 +322,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
struct nvk_physical_device *pdev = nvk_device_physical(dev);
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
const struct nvk_shader *shader = cmd->state.cs.shader;
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
VkResult result;
@ -224,7 +350,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
uint32_t qmd[128];
memset(qmd, 0, sizeof(qmd));
memcpy(qmd, pipeline->qmd_template, sizeof(pipeline->qmd_template));
nvk_qmd_init(pdev, qmd, shader);
if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_A) {
nvc6c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
@ -244,8 +370,6 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
desc->root.cs.group_count[2]);
}
const struct nvk_shader *shader =
pipeline->base.shaders[MESA_SHADER_COMPUTE];
for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];

View file

@ -11,7 +11,7 @@
#include "nvk_image_view.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_pipeline.h"
#include "nvk_shader.h"
#include "nil_format.h"
#include "util/bitpack_helpers.h"
@ -370,13 +370,6 @@ nvk_push_draw_state_init(struct nvk_device *dev, struct nv_push *p)
P_NV9097_SET_PROGRAM_REGION_B(p, shader_base_addr);
}
for (uint32_t i = 0; i < 6; i++) {
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(i), {
.enable = ENABLE_FALSE,
.type = i,
});
}
for (uint32_t group = 0; group < 5; group++) {
for (uint32_t slot = 0; slot < 16; slot++) {
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
@ -495,6 +488,8 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd,
nvk_cmd_buffer_dirty_render_pass(cmd);
}
}
cmd->state.gfx.shaders_dirty = ~0;
}
void
@ -514,6 +509,8 @@ nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd)
struct nvk_rendering_state render_save = cmd->state.gfx.render;
memset(&cmd->state.gfx, 0, sizeof(cmd->state.gfx));
cmd->state.gfx.render = render_save;
cmd->state.gfx.shaders_dirty = ~0;
}
static void
@ -951,23 +948,223 @@ nvk_CmdEndRendering(VkCommandBuffer commandBuffer)
}
void
nvk_cmd_bind_graphics_pipeline(struct nvk_cmd_buffer *cmd,
struct nvk_graphics_pipeline *pipeline)
nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
const gl_shader_stage stage,
struct nvk_shader *shader)
{
cmd->state.gfx.pipeline = pipeline;
vk_cmd_set_dynamic_graphics_state(&cmd->vk, &pipeline->dynamic);
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
assert(stage < ARRAY_SIZE(cmd->state.gfx.shaders));
if (cmd->state.gfx.shaders[stage] == shader)
return;
cmd->state.gfx.shaders[stage] = shader;
cmd->state.gfx.shaders_dirty |= BITFIELD_BIT(stage);
/* When a pipeline with tess shaders is bound we need to re-upload the
* tessellation parameters at flush_ts_state, as the domain origin can be
* dynamic.
*/
if (nvk_shader_is_enabled(pipeline->base.shaders[MESA_SHADER_TESS_EVAL])) {
BITSET_SET(cmd->vk.dynamic_graphics_state.dirty,
MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
if (stage == MESA_SHADER_TESS_EVAL)
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN);
/* Emitting SET_HYBRID_ANTI_ALIAS_CONTROL requires the fragment shader */
if (stage == MESA_SHADER_FRAGMENT)
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
}
static uint32_t
mesa_to_nv9097_shader_type(gl_shader_stage stage)
{
static const uint32_t mesa_to_nv9097[] = {
[MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
[MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
[MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
[MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
[MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
};
assert(stage < ARRAY_SIZE(mesa_to_nv9097));
return mesa_to_nv9097[stage];
}
static uint32_t
nvk_pipeline_bind_group(gl_shader_stage stage)
{
return stage;
}
static void
nvk_flush_shaders(struct nvk_cmd_buffer *cmd)
{
if (cmd->state.gfx.shaders_dirty == 0)
return;
/* Map shader types to shaders */
struct nvk_shader *type_shader[6] = { NULL, };
uint32_t types_dirty = 0;
const uint32_t gfx_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |
BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
BITFIELD_BIT(MESA_SHADER_GEOMETRY) |
BITFIELD_BIT(MESA_SHADER_FRAGMENT);
u_foreach_bit(stage, cmd->state.gfx.shaders_dirty & gfx_stages) {
uint32_t type = mesa_to_nv9097_shader_type(stage);
types_dirty |= BITFIELD_BIT(type);
/* Only copy non-NULL shaders because mesh/task alias with vertex and
* tessellation stages.
*/
if (cmd->state.gfx.shaders[stage] != NULL) {
assert(type < ARRAY_SIZE(type_shader));
assert(type_shader[type] == NULL);
type_shader[type] = cmd->state.gfx.shaders[stage];
}
}
struct nv_push *p = nvk_cmd_buffer_push(cmd, pipeline->push_dw_count);
nv_push_raw(p, pipeline->push_data, pipeline->push_dw_count);
u_foreach_bit(type, types_dirty) {
struct nvk_shader *shader = type_shader[type];
/* We always map index == type */
const uint32_t idx = type;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), {
.enable = shader != NULL,
.type = type,
});
if (shader == NULL)
continue;
uint64_t addr = shader->hdr_addr;
if (nvk_cmd_buffer_3d_cls(cmd) >= VOLTA_A) {
P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx));
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32);
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr);
} else {
assert(addr < 0xffffffff);
P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr);
}
P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx));
P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs);
P_NVC397_SET_PIPELINE_BINDING(p, idx,
nvk_pipeline_bind_group(shader->info.stage));
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
p = nvk_cmd_buffer_push(cmd, 9);
P_MTHD(p, NVC397, SET_SUBTILING_PERF_KNOB_A);
P_NV9097_SET_SUBTILING_PERF_KNOB_A(p, {
.fraction_of_spm_register_file_per_subtile = 0x10,
.fraction_of_spm_pixel_output_buffer_per_subtile = 0x40,
.fraction_of_spm_triangle_ram_per_subtile = 0x16,
.fraction_of_max_quads_per_subtile = 0x20,
});
P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20);
P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z,
shader->info.fs.early_fragment_tests);
if (nvk_cmd_buffer_3d_cls(cmd) >= MAXWELL_B) {
P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK,
shader->info.fs.post_depth_coverage);
} else {
assert(!shader->info.fs.post_depth_coverage);
}
P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, {
.z_min_unbounded_enable = shader->info.fs.writes_depth,
.z_max_unbounded_enable = shader->info.fs.writes_depth,
});
}
}
const uint32_t vtg_stages = BITFIELD_BIT(MESA_SHADER_VERTEX) |
BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |
BITFIELD_BIT(MESA_SHADER_GEOMETRY);
const uint32_t vtgm_stages = vtg_stages | BITFIELD_BIT(MESA_SHADER_MESH);
if (cmd->state.gfx.shaders_dirty & vtg_stages) {
struct nak_xfb_info *xfb = NULL;
u_foreach_bit(stage, vtg_stages) {
if (cmd->state.gfx.shaders[stage] != NULL)
xfb = &cmd->state.gfx.shaders[stage]->info.vtg.xfb;
}
if (xfb == NULL) {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
for (uint8_t b = 0; b < 4; b++)
P_IMMD(p, NV9097, SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(b), 0);
} else {
for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) {
const uint8_t attr_count = xfb->attr_count[b];
/* upload packed varying indices in multiples of 4 bytes */
const uint32_t n = DIV_ROUND_UP(attr_count, 4);
struct nv_push *p = nvk_cmd_buffer_push(cmd, 5 + n);
P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b));
P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]);
P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count);
P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]);
if (n > 0) {
P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0));
P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n);
}
}
}
}
if (cmd->state.gfx.shaders_dirty & vtgm_stages) {
struct nvk_shader *last_vtgm = NULL;
u_foreach_bit(stage, vtgm_stages) {
if (cmd->state.gfx.shaders[stage] != NULL)
last_vtgm = cmd->state.gfx.shaders[stage];
}
struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
P_IMMD(p, NV9097, SET_RT_LAYER, {
.v = 0,
.control = last_vtgm->info.vtg.writes_layer ?
CONTROL_GEOMETRY_SHADER_SELECTS_LAYER :
CONTROL_V_SELECTS_LAYER,
});
P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, {
.enable = last_vtgm->info.vtg.writes_point_size,
.slot = 0,
});
const uint8_t clip_enable = last_vtgm->info.vtg.clip_enable;
const uint8_t cull_enable = last_vtgm->info.vtg.cull_enable;
P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, {
.plane0 = ((clip_enable | cull_enable) >> 0) & 1,
.plane1 = ((clip_enable | cull_enable) >> 1) & 1,
.plane2 = ((clip_enable | cull_enable) >> 2) & 1,
.plane3 = ((clip_enable | cull_enable) >> 3) & 1,
.plane4 = ((clip_enable | cull_enable) >> 4) & 1,
.plane5 = ((clip_enable | cull_enable) >> 5) & 1,
.plane6 = ((clip_enable | cull_enable) >> 6) & 1,
.plane7 = ((clip_enable | cull_enable) >> 7) & 1,
});
P_IMMD(p, NV9097, SET_USER_CLIP_OP, {
.plane0 = (cull_enable >> 0) & 1,
.plane1 = (cull_enable >> 1) & 1,
.plane2 = (cull_enable >> 2) & 1,
.plane3 = (cull_enable >> 3) & 1,
.plane4 = (cull_enable >> 4) & 1,
.plane5 = (cull_enable >> 5) & 1,
.plane6 = (cull_enable >> 6) & 1,
.plane7 = (cull_enable >> 7) & 1,
});
}
cmd->state.gfx.shaders_dirty = 0;
}
static void
@ -1045,11 +1242,10 @@ nvk_flush_ts_state(struct nvk_cmd_buffer *cmd)
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
const struct nvk_graphics_pipeline *pipeline= cmd->state.gfx.pipeline;
const struct nvk_shader *shader =
pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];
if (nvk_shader_is_enabled(shader)) {
if (shader != NULL) {
enum nak_ts_prims prims = shader->info.ts.prims;
/* When the origin is lower-left, we have to flip the winding order */
if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
@ -1433,9 +1629,10 @@ nvk_flush_ms_state(struct nvk_cmd_buffer *cmd)
dyn->ms.rasterization_samples == render->samples);
}
const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline;
struct nvk_shader *fs = cmd->state.gfx.shaders[MESA_SHADER_FRAGMENT];
const float min_sample_shading = fs != NULL ? fs->min_sample_shading : 0;
uint32_t min_samples = ceilf(dyn->ms.rasterization_samples *
pipeline->min_sample_shading);
min_sample_shading);
min_samples = util_next_power_of_two(MAX2(1, min_samples));
P_IMMD(p, NV9097, SET_HYBRID_ANTI_ALIAS_CONTROL, {
@ -1923,7 +2120,6 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
struct nvk_physical_device *pdev = nvk_device_physical(dev);
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
const struct nvk_graphics_pipeline *pipeline = cmd->state.gfx.pipeline;
struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors;
VkResult result;
@ -1952,8 +2148,8 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
/* Find cbuf maps for the 5 cbuf groups */
const struct nvk_shader *cbuf_shaders[5] = { NULL, };
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
const struct nvk_shader *shader = pipeline->base.shaders[stage];
if (!shader || shader->code_size == 0)
const struct nvk_shader *shader = cmd->state.gfx.shaders[stage];
if (shader == NULL)
continue;
uint32_t group = nvk_cbuf_binding_for_stage(stage);
@ -2053,6 +2249,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
static void
nvk_flush_gfx_state(struct nvk_cmd_buffer *cmd)
{
nvk_flush_shaders(cmd);
nvk_flush_dynamic_state(cmd);
nvk_flush_descriptors(cmd);
}

View file

@ -60,7 +60,7 @@ struct nvk_meta_save {
struct vk_vertex_input_state _dynamic_vi;
struct vk_sample_locations_state _dynamic_sl;
struct vk_dynamic_graphics_state dynamic;
struct nvk_graphics_pipeline *pipeline;
struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
struct nvk_addr_range vb0;
struct nvk_descriptor_set *desc0;
bool has_push_desc0;
@ -76,7 +76,9 @@ nvk_meta_begin(struct nvk_cmd_buffer *cmd,
save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
save->pipeline = cmd->state.gfx.pipeline;
STATIC_ASSERT(sizeof(cmd->state.gfx.shaders) == sizeof(save->shaders));
memcpy(save->shaders, cmd->state.gfx.shaders, sizeof(save->shaders));
save->vb0 = cmd->state.gfx.vb0;
save->desc0 = cmd->state.gfx.descriptors.sets[0];
@ -148,8 +150,12 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd,
cmd->vk.dynamic_graphics_state.set,
sizeof(cmd->vk.dynamic_graphics_state.set));
if (save->pipeline)
nvk_cmd_bind_graphics_pipeline(cmd, save->pipeline);
for (uint32_t stage = 0; stage < ARRAY_SIZE(save->shaders); stage++) {
if (stage == MESA_SHADER_COMPUTE)
continue;
nvk_cmd_bind_graphics_shader(cmd, stage, save->shaders[stage]);
}
nvk_cmd_bind_vertex_buffer(cmd, 0, save->vb0);

View file

@ -8,6 +8,7 @@
#include "nvk_entrypoints.h"
#include "nvk_instance.h"
#include "nvk_physical_device.h"
#include "nvk_shader.h"
#include "vk_pipeline_cache.h"
#include "vulkan/wsi/wsi_common.h"
@ -146,6 +147,8 @@ nvk_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto fail_alloc;
dev->vk.shader_ops = &nvk_device_shader_ops;
drmDevicePtr drm_device = NULL;
int ret = drmGetDeviceFromDevId(pdev->render_dev, 0, &drm_device);
if (ret != 0) {

View file

@ -1,417 +0,0 @@
/*
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "nvk_pipeline.h"
#include "nvk_device.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_shader.h"
#include "vk_nir.h"
#include "vk_pipeline.h"
#include "vk_pipeline_layout.h"
#include "nv_push.h"
#include "nouveau_context.h"
#include "compiler/spirv/nir_spirv.h"
#include "nvk_cl9097.h"
#include "nvk_clb197.h"
#include "nvk_clc397.h"
static void
emit_pipeline_xfb_state(struct nv_push *p, const struct nak_xfb_info *xfb)
{
for (uint8_t b = 0; b < ARRAY_SIZE(xfb->attr_count); b++) {
const uint8_t attr_count = xfb->attr_count[b];
P_MTHD(p, NV9097, SET_STREAM_OUT_CONTROL_STREAM(b));
P_NV9097_SET_STREAM_OUT_CONTROL_STREAM(p, b, xfb->stream[b]);
P_NV9097_SET_STREAM_OUT_CONTROL_COMPONENT_COUNT(p, b, attr_count);
P_NV9097_SET_STREAM_OUT_CONTROL_STRIDE(p, b, xfb->stride[b]);
/* upload packed varying indices in multiples of 4 bytes */
const uint32_t n = DIV_ROUND_UP(attr_count, 4);
if (n > 0) {
P_MTHD(p, NV9097, SET_STREAM_OUT_LAYOUT_SELECT(b, 0));
P_INLINE_ARRAY(p, (const uint32_t*)xfb->attr_index[b], n);
}
}
}
static const uint32_t mesa_to_nv9097_shader_type[] = {
[MESA_SHADER_VERTEX] = NV9097_SET_PIPELINE_SHADER_TYPE_VERTEX,
[MESA_SHADER_TESS_CTRL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION_INIT,
[MESA_SHADER_TESS_EVAL] = NV9097_SET_PIPELINE_SHADER_TYPE_TESSELLATION,
[MESA_SHADER_GEOMETRY] = NV9097_SET_PIPELINE_SHADER_TYPE_GEOMETRY,
[MESA_SHADER_FRAGMENT] = NV9097_SET_PIPELINE_SHADER_TYPE_PIXEL,
};
static void
merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
{
/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
*
* "PointMode. Controls generation of points rather than triangles
* or lines. This functionality defaults to disabled, and is
* enabled if either shader stage includes the execution mode.
*
* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
* and OutputVertices, it says:
*
* "One mode must be set in at least one of the tessellation
* shader stages."
*
* So, the fields can be set in either the TCS or TES, but they must
* agree if set in both. Our backend looks at TES, so bitwise-or in
* the values from the TCS.
*/
assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
tcs_info->tess.spacing == tes_info->tess.spacing);
tes_info->tess.spacing |= tcs_info->tess.spacing;
assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
tes_info->tess.ccw |= tcs_info->tess.ccw;
tes_info->tess.point_mode |= tcs_info->tess.point_mode;
/* Copy the merged info back to the TCS */
tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
tcs_info->tess.spacing = tes_info->tess.spacing;
tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
tcs_info->tess.ccw = tes_info->tess.ccw;
tcs_info->tess.point_mode = tes_info->tess.point_mode;
}
VkResult
nvk_graphics_pipeline_create(struct nvk_device *dev,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
struct nvk_graphics_pipeline *pipeline;
VkResult result = VK_SUCCESS;
pipeline = (void *)nvk_pipeline_zalloc(dev, NVK_PIPELINE_GRAPHICS,
sizeof(*pipeline), pAllocator);
if (pipeline == NULL)
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
VkPipelineCreateFlags2KHR pipeline_flags =
vk_graphics_pipeline_create_flags(pCreateInfo);
if (pipeline_flags &
VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
cache = NULL;
struct vk_graphics_pipeline_all_state all;
struct vk_graphics_pipeline_state state = {};
result = vk_graphics_pipeline_state_fill(&dev->vk, &state, pCreateInfo,
NULL, 0, &all, NULL, 0, NULL);
assert(result == VK_SUCCESS);
VkPipelineCreationFeedbackEXT pipeline_feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
};
VkPipelineCreationFeedbackEXT stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
int64_t pipeline_start = os_time_get_nano();
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
const VkPipelineShaderStageCreateInfo *infos[MESA_SHADER_STAGES] = {};
nir_shader *nir[MESA_SHADER_STAGES] = {};
struct vk_pipeline_robustness_state robustness[MESA_SHADER_STAGES];
struct vk_pipeline_cache_object *cache_objs[MESA_SHADER_STAGES] = {};
struct nak_fs_key fs_key_tmp, *fs_key = NULL;
nvk_populate_fs_key(&fs_key_tmp, &state);
fs_key = &fs_key_tmp;
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
infos[stage] = sinfo;
}
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
if (sinfo == NULL)
continue;
vk_pipeline_robustness_state_fill(&dev->vk, &robustness[stage],
pCreateInfo->pNext, sinfo->pNext);
}
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
if (sinfo == NULL)
continue;
unsigned char sha1[SHA1_DIGEST_LENGTH];
nvk_hash_shader(sha1, sinfo, &robustness[stage],
state.rp->view_mask != 0, pipeline_layout,
stage == MESA_SHADER_FRAGMENT ? fs_key : NULL);
if (cache) {
bool cache_hit = false;
cache_objs[stage] = vk_pipeline_cache_lookup_object(cache, &sha1, sizeof(sha1),
&nvk_shader_ops, &cache_hit);
pipeline->base.shaders[stage] =
container_of(cache_objs[stage], struct nvk_shader, base);
if (cache_hit && cache != dev->mem_cache)
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
if (!cache_objs[stage] &&
pCreateInfo->flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR) {
result = VK_PIPELINE_COMPILE_REQUIRED;
goto fail;
}
}
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
if (sinfo == NULL || cache_objs[stage])
continue;
result = nvk_shader_stage_to_nir(dev, sinfo, &robustness[stage],
cache, NULL, &nir[stage]);
if (result != VK_SUCCESS)
goto fail;
}
if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_TESS_EVAL]) {
merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
}
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
const VkPipelineShaderStageCreateInfo *sinfo = infos[stage];
if (sinfo == NULL)
continue;
if (!cache_objs[stage]) {
int64_t stage_start = os_time_get_nano();
unsigned char sha1[SHA1_DIGEST_LENGTH];
nvk_hash_shader(sha1, sinfo, &robustness[stage],
state.rp->view_mask != 0, pipeline_layout,
stage == MESA_SHADER_FRAGMENT ? fs_key : NULL);
struct nvk_shader *shader = nvk_shader_init(dev, sha1, SHA1_DIGEST_LENGTH);
if(shader == NULL) {
result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
nvk_lower_nir(dev, nir[stage], &robustness[stage],
state.rp->view_mask != 0,
pipeline_layout->set_count,
pipeline_layout->set_layouts,
&shader->cbuf_map);
result = nvk_compile_nir(dev, nir[stage],
pipeline_flags, &robustness[stage],
stage == MESA_SHADER_FRAGMENT ? fs_key : NULL,
cache, shader);
if (result == VK_SUCCESS) {
cache_objs[stage] = &shader->base;
if (cache)
cache_objs[stage] = vk_pipeline_cache_add_object(cache,
cache_objs[stage]);
stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
pipeline->base.shaders[stage] =
container_of(cache_objs[stage], struct nvk_shader, base);
}
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
ralloc_free(nir[stage]);
}
if (result != VK_SUCCESS)
goto fail;
result = nvk_shader_upload(dev, pipeline->base.shaders[stage]);
if (result != VK_SUCCESS)
goto fail;
}
struct nv_push push;
nv_push_init(&push, pipeline->push_data, ARRAY_SIZE(pipeline->push_data));
struct nv_push *p = &push;
bool force_max_samples = false;
struct nvk_shader *last_geom = NULL;
for (gl_shader_stage stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
struct nvk_shader *shader = pipeline->base.shaders[stage];
uint32_t idx = mesa_to_nv9097_shader_type[stage];
P_IMMD(p, NV9097, SET_PIPELINE_SHADER(idx), {
.enable = nvk_shader_is_enabled(shader),
.type = mesa_to_nv9097_shader_type[stage],
});
if (!nvk_shader_is_enabled(shader))
continue;
if (stage != MESA_SHADER_FRAGMENT)
last_geom = shader;
uint64_t addr = shader->hdr_addr;
if (dev->pdev->info.cls_eng3d >= VOLTA_A) {
P_MTHD(p, NVC397, SET_PIPELINE_PROGRAM_ADDRESS_A(idx));
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_A(p, idx, addr >> 32);
P_NVC397_SET_PIPELINE_PROGRAM_ADDRESS_B(p, idx, addr);
} else {
assert(addr < 0xffffffff);
P_IMMD(p, NV9097, SET_PIPELINE_PROGRAM(idx), addr);
}
P_MTHD(p, NVC397, SET_PIPELINE_REGISTER_COUNT(idx));
P_NVC397_SET_PIPELINE_REGISTER_COUNT(p, idx, shader->info.num_gprs);
P_NVC397_SET_PIPELINE_BINDING(p, idx, nvk_cbuf_binding_for_stage(stage));
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_GEOMETRY:
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
break;
case MESA_SHADER_FRAGMENT:
P_IMMD(p, NV9097, SET_SUBTILING_PERF_KNOB_A, {
.fraction_of_spm_register_file_per_subtile = 0x10,
.fraction_of_spm_pixel_output_buffer_per_subtile = 0x40,
.fraction_of_spm_triangle_ram_per_subtile = 0x16,
.fraction_of_max_quads_per_subtile = 0x20,
});
P_NV9097_SET_SUBTILING_PERF_KNOB_B(p, 0x20);
P_IMMD(p, NV9097, SET_API_MANDATED_EARLY_Z,
shader->info.fs.early_fragment_tests);
if (dev->pdev->info.cls_eng3d >= MAXWELL_B) {
P_IMMD(p, NVB197, SET_POST_Z_PS_IMASK,
shader->info.fs.post_depth_coverage);
} else {
assert(!shader->info.fs.post_depth_coverage);
}
P_IMMD(p, NV9097, SET_ZCULL_BOUNDS, {
.z_min_unbounded_enable = shader->info.fs.writes_depth,
.z_max_unbounded_enable = shader->info.fs.writes_depth,
});
/* If we're using the incoming sample mask and doing sample shading,
* we have to do sample shading "to the max", otherwise there's no
* way to tell which sets of samples are covered by the current
* invocation.
*/
force_max_samples = shader->info.fs.reads_sample_mask ||
shader->info.fs.uses_sample_shading;
break;
default:
unreachable("Unsupported shader stage");
}
}
const uint8_t clip_cull = last_geom->info.vtg.clip_enable |
last_geom->info.vtg.cull_enable;
if (clip_cull) {
P_IMMD(p, NV9097, SET_USER_CLIP_ENABLE, {
.plane0 = (clip_cull >> 0) & 1,
.plane1 = (clip_cull >> 1) & 1,
.plane2 = (clip_cull >> 2) & 1,
.plane3 = (clip_cull >> 3) & 1,
.plane4 = (clip_cull >> 4) & 1,
.plane5 = (clip_cull >> 5) & 1,
.plane6 = (clip_cull >> 6) & 1,
.plane7 = (clip_cull >> 7) & 1,
});
P_IMMD(p, NV9097, SET_USER_CLIP_OP, {
.plane0 = (last_geom->info.vtg.cull_enable >> 0) & 1,
.plane1 = (last_geom->info.vtg.cull_enable >> 1) & 1,
.plane2 = (last_geom->info.vtg.cull_enable >> 2) & 1,
.plane3 = (last_geom->info.vtg.cull_enable >> 3) & 1,
.plane4 = (last_geom->info.vtg.cull_enable >> 4) & 1,
.plane5 = (last_geom->info.vtg.cull_enable >> 5) & 1,
.plane6 = (last_geom->info.vtg.cull_enable >> 6) & 1,
.plane7 = (last_geom->info.vtg.cull_enable >> 7) & 1,
});
}
/* TODO: prog_selects_layer */
P_IMMD(p, NV9097, SET_RT_LAYER, {
.v = 0,
.control = last_geom->info.vtg.writes_layer ?
CONTROL_GEOMETRY_SHADER_SELECTS_LAYER :
CONTROL_V_SELECTS_LAYER,
});
P_IMMD(p, NV9097, SET_ATTRIBUTE_POINT_SIZE, {
.enable = last_geom->info.vtg.writes_point_size,
.slot = 0,
});
emit_pipeline_xfb_state(&push, &last_geom->info.vtg.xfb);
pipeline->push_dw_count = nv_push_dw_count(&push);
if (force_max_samples)
pipeline->min_sample_shading = 1;
else if (state.ms != NULL && state.ms->sample_shading_enable)
pipeline->min_sample_shading = CLAMP(state.ms->min_sample_shading, 0, 1);
else
pipeline->min_sample_shading = 0;
pipeline->dynamic.vi = &pipeline->_dynamic_vi;
pipeline->dynamic.ms.sample_locations = &pipeline->_dynamic_sl;
vk_dynamic_graphics_state_fill(&pipeline->dynamic, &state);
pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
if (creation_feedback) {
*creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
int fb_count = creation_feedback->pipelineStageCreationFeedbackCount;
if (pCreateInfo->stageCount == fb_count) {
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *sinfo =
&pCreateInfo->pStages[i];
gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
creation_feedback->pPipelineStageCreationFeedbacks[i] =
stage_feedbacks[stage];
}
}
}
*pPipeline = nvk_pipeline_to_handle(&pipeline->base);
return VK_SUCCESS;
fail:
vk_object_free(&dev->vk, pAllocator, pipeline);
return result;
}

View file

@ -1,74 +0,0 @@
/*
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#ifndef NVK_PIPELINE_H
#define NVK_PIPELINE_H 1
#include "nvk_private.h"
#include "nvk_shader.h"
#include "vk_graphics_state.h"
#include "vk_object.h"
struct vk_pipeline_cache;
enum nvk_pipeline_type {
NVK_PIPELINE_GRAPHICS,
NVK_PIPELINE_COMPUTE,
};
struct nvk_pipeline {
struct vk_object_base base;
enum nvk_pipeline_type type;
struct nvk_shader *shaders[MESA_SHADER_STAGES];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(nvk_pipeline, base, VkPipeline,
VK_OBJECT_TYPE_PIPELINE)
void
nvk_pipeline_free(struct nvk_device *dev,
struct nvk_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator);
struct nvk_pipeline *
nvk_pipeline_zalloc(struct nvk_device *dev,
enum nvk_pipeline_type type, size_t size,
const VkAllocationCallbacks *pAllocator);
struct nvk_compute_pipeline {
struct nvk_pipeline base;
uint32_t qmd_template[64];
};
VkResult
nvk_compute_pipeline_create(struct nvk_device *dev,
struct vk_pipeline_cache *cache,
const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline);
struct nvk_graphics_pipeline {
struct nvk_pipeline base;
uint32_t push_data[192];
uint32_t push_dw_count;
float min_sample_shading;
struct vk_vertex_input_state _dynamic_vi;
struct vk_sample_locations_state _dynamic_sl;
struct vk_dynamic_graphics_state dynamic;
};
VkResult
nvk_graphics_pipeline_create(struct nvk_device *dev,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline);
#endif

View file

@ -11,7 +11,6 @@
#include "nvk_event.h"
#include "nvk_mme.h"
#include "nvk_physical_device.h"
#include "nvk_pipeline.h"
#include "vk_meta.h"
#include "vk_pipeline.h"
@ -973,12 +972,13 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd,
}
/* Save pipeline and push constants */
struct nvk_compute_pipeline *pipeline_save = cmd->state.cs.pipeline;
struct nvk_shader *shader_save = cmd->state.cs.shader;
uint8_t push_save[NVK_MAX_PUSH_SIZE];
memcpy(push_save, desc->root.push, NVK_MAX_PUSH_SIZE);
nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
dev->vk.dispatch_table.CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
VK_PIPELINE_BIND_POINT_COMPUTE,
pipeline);
nvk_CmdPushConstants(nvk_cmd_buffer_to_handle(cmd), layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push), &push);
@ -986,11 +986,8 @@ nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd,
nvk_CmdDispatchBase(nvk_cmd_buffer_to_handle(cmd), 0, 0, 0, 1, 1, 1);
/* Restore pipeline and push constants */
if (pipeline_save) {
nvk_CmdBindPipeline(nvk_cmd_buffer_to_handle(cmd),
VK_PIPELINE_BIND_POINT_COMPUTE,
nvk_pipeline_to_handle(&pipeline_save->base));
}
if (shader_save)
nvk_cmd_bind_compute_shader(cmd, shader_save);
memcpy(desc->root.push, push_save, NVK_MAX_PUSH_SIZE);
}

View file

@ -8,12 +8,11 @@
#include "nvk_descriptor_set_layout.h"
#include "nvk_device.h"
#include "nvk_physical_device.h"
#include "nvk_pipeline.h"
#include "nvk_sampler.h"
#include "nvk_shader.h"
#include "vk_nir_convert_ycbcr.h"
#include "vk_pipeline.h"
#include "vk_pipeline_cache.h"
#include "vk_pipeline_layout.h"
#include "vk_shader_module.h"
#include "vk_ycbcr_conversion.h"
@ -186,7 +185,7 @@ nvk_preprocess_nir(struct vk_physical_device *vk_pdev, nir_shader *nir)
nvk_cg_preprocess_nir(nir);
}
void
static void
nvk_populate_fs_key(struct nak_fs_key *key,
const struct vk_graphics_pipeline_state *state)
{
@ -195,6 +194,9 @@ nvk_populate_fs_key(struct nak_fs_key *key,
key->sample_locations_cb = 0;
key->sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations);
if (state == NULL)
return;
if (state->pipeline_flags &
VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
key->zs_self_dep = true;
@ -208,6 +210,25 @@ nvk_populate_fs_key(struct nak_fs_key *key,
key->force_sample_shading = true;
}
static void
nvk_hash_graphics_state(struct vk_physical_device *device,
const struct vk_graphics_pipeline_state *state,
VkShaderStageFlags stages,
blake3_hash blake3_out)
{
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
struct nak_fs_key key;
nvk_populate_fs_key(&key, state);
_mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
const bool is_multiview = state->rp->view_mask != 0;
_mesa_blake3_update(&blake3_ctx, &is_multiview, sizeof(is_multiview));
}
_mesa_blake3_final(&blake3_ctx, blake3_out);
}
static bool
lower_load_global_constant_offset_instr(nir_builder *b,
nir_intrinsic_instr *intrin,
@ -290,52 +311,6 @@ lookup_ycbcr_conversion(const void *_state, uint32_t set,
&sampler->vk.ycbcr_conversion->state : NULL;
}
VkResult
nvk_shader_stage_to_nir(struct nvk_device *dev,
const VkPipelineShaderStageCreateInfo *sinfo,
const struct vk_pipeline_robustness_state *rstate,
struct vk_pipeline_cache *cache,
void *mem_ctx, struct nir_shader **nir_out)
{
struct nvk_physical_device *pdev = nvk_device_physical(dev);
const gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
const nir_shader_compiler_options *nir_options =
nvk_get_nir_options(&pdev->vk, stage, rstate);
unsigned char stage_sha1[SHA1_DIGEST_LENGTH];
vk_pipeline_hash_shader_stage(sinfo, rstate, stage_sha1);
if (cache == NULL)
cache = dev->mem_cache;
nir_shader *nir = vk_pipeline_cache_lookup_nir(cache, stage_sha1,
sizeof(stage_sha1),
nir_options, NULL,
mem_ctx);
if (nir != NULL) {
*nir_out = nir;
return VK_SUCCESS;
}
const struct spirv_to_nir_options spirv_options =
nvk_get_spirv_options(&pdev->vk, stage, rstate);
VkResult result = vk_pipeline_shader_stage_to_nir(&dev->vk, sinfo,
&spirv_options,
nir_options,
mem_ctx, &nir);
if (result != VK_SUCCESS)
return result;
nvk_preprocess_nir(&dev->pdev->vk, nir);
vk_pipeline_cache_add_nir(cache, stage_sha1, sizeof(stage_sha1), nir);
*nir_out = nir;
return VK_SUCCESS;
}
static inline bool
nir_has_image_var(nir_shader *nir)
{
@ -493,13 +468,13 @@ nvk_shader_dump(struct nvk_shader *shader)
static VkResult
nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
nir_shader *nir,
VkPipelineCreateFlagBits2KHR pipeline_flags,
VkShaderCreateFlagsEXT shader_flags,
const struct vk_pipeline_robustness_state *rs,
const struct nak_fs_key *fs_key,
struct nvk_shader *shader)
{
const bool dump_asm =
pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;
nir_variable_mode robust2_modes = 0;
if (rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
@ -515,38 +490,18 @@ nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
return VK_SUCCESS;
}
struct nvk_shader *
nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size)
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct nvk_shader, shader, 1);
VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_key_data, key_size);
if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return NULL;
memcpy(obj_key_data, key_data, key_size);
vk_pipeline_cache_object_init(&dev->vk, &shader->base,
&nvk_shader_ops, obj_key_data, key_size);
return shader;
}
VkResult
static VkResult
nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
VkPipelineCreateFlagBits2KHR pipeline_flags,
VkShaderCreateFlagsEXT shader_flags,
const struct vk_pipeline_robustness_state *rs,
const struct nak_fs_key *fs_key,
struct vk_pipeline_cache *cache,
struct nvk_shader *shader)
{
struct nvk_physical_device *pdev = nvk_device_physical(dev);
VkResult result;
if (use_nak(pdev, nir->info.stage)) {
result = nvk_compile_nir_with_nak(pdev, nir, pipeline_flags, rs,
result = nvk_compile_nir_with_nak(pdev, nir, shader_flags, rs,
fs_key, shader);
} else {
result = nvk_cg_compile_nir(pdev, nir, fs_key, shader);
@ -555,7 +510,7 @@ nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
return result;
if (nir->constant_data_size > 0) {
uint32_t data_align = nvk_min_cbuf_alignment(&dev->pdev->info);
uint32_t data_align = nvk_min_cbuf_alignment(&pdev->info);
uint32_t data_size = align(nir->constant_data_size, data_align);
void *data = malloc(data_size);
@ -650,11 +605,15 @@ nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
return result;
}
void
nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader)
static const struct vk_shader_ops nvk_shader_ops;
static void
nvk_shader_destroy(struct vk_device *vk_dev,
struct vk_shader *vk_shader,
const VkAllocationCallbacks* pAllocator)
{
if (shader == NULL)
return;
struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
if (shader->upload_size > 0) {
nvk_heap_free(dev, &dev->shader_heap,
@ -671,127 +630,330 @@ nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader)
free((void *)shader->data_ptr);
vk_free(&dev->vk.alloc, shader);
vk_shader_free(&dev->vk, pAllocator, &shader->vk);
}
void
nvk_hash_shader(unsigned char *hash,
const VkPipelineShaderStageCreateInfo *sinfo,
const struct vk_pipeline_robustness_state *rs,
bool is_multiview,
const struct vk_pipeline_layout *layout,
const struct nak_fs_key *fs_key)
static VkResult
nvk_compile_shader(struct nvk_device *dev,
struct vk_shader_compile_info *info,
const struct vk_graphics_pipeline_state *state,
const VkAllocationCallbacks* pAllocator,
struct vk_shader **shader_out)
{
struct mesa_sha1 ctx;
struct nvk_shader *shader;
VkResult result;
_mesa_sha1_init(&ctx);
/* We consume the NIR, regardless of success or failure */
nir_shader *nir = info->nir;
unsigned char stage_sha1[SHA1_DIGEST_LENGTH];
vk_pipeline_hash_shader_stage(sinfo, rs, stage_sha1);
shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info->stage,
pAllocator, sizeof(*shader));
if (shader == NULL) {
ralloc_free(nir);
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
}
_mesa_sha1_update(&ctx, stage_sha1, sizeof(stage_sha1));
/* TODO: Multiview with ESO */
const bool is_multiview = state && state->rp->view_mask != 0;
_mesa_sha1_update(&ctx, &is_multiview, sizeof(is_multiview));
nvk_lower_nir(dev, nir, info->robustness, is_multiview,
info->set_layout_count, info->set_layouts,
&shader->cbuf_map);
if (layout) {
_mesa_sha1_update(&ctx, &layout->create_flags,
sizeof(layout->create_flags));
_mesa_sha1_update(&ctx, &layout->set_count, sizeof(layout->set_count));
for (int i = 0; i < layout->set_count; i++) {
struct nvk_descriptor_set_layout *set =
vk_to_nvk_descriptor_set_layout(layout->set_layouts[i]);
_mesa_sha1_update(&ctx, &set->vk.blake3, sizeof(set->vk.blake3));
struct nak_fs_key fs_key_tmp, *fs_key = NULL;
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
nvk_populate_fs_key(&fs_key_tmp, state);
fs_key = &fs_key_tmp;
}
result = nvk_compile_nir(dev, nir, info->flags, info->robustness,
fs_key, shader);
ralloc_free(nir);
if (result != VK_SUCCESS) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return result;
}
result = nvk_shader_upload(dev, shader);
if (result != VK_SUCCESS) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return result;
}
if (info->stage == MESA_SHADER_FRAGMENT) {
if (shader->info.fs.reads_sample_mask ||
shader->info.fs.uses_sample_shading) {
shader->min_sample_shading = 1;
} else if (state != NULL && state->ms != NULL &&
state->ms->sample_shading_enable) {
shader->min_sample_shading =
CLAMP(state->ms->min_sample_shading, 0, 1);
} else {
shader->min_sample_shading = 0;
}
}
if(fs_key)
_mesa_sha1_update(&ctx, fs_key, sizeof(*fs_key));
*shader_out = &shader->vk;
_mesa_sha1_final(&ctx, hash);
return VK_SUCCESS;
}
static bool
nvk_shader_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob);
static struct vk_pipeline_cache_object *
nvk_shader_deserialize(struct vk_pipeline_cache *cache,
const void *key_data,
size_t key_size,
struct blob_reader *blob);
void
nvk_shader_destroy(struct vk_device *_dev,
struct vk_pipeline_cache_object *object)
static VkResult
nvk_compile_shaders(struct vk_device *vk_dev,
uint32_t shader_count,
struct vk_shader_compile_info *infos,
const struct vk_graphics_pipeline_state *state,
const VkAllocationCallbacks* pAllocator,
struct vk_shader **shaders_out)
{
struct nvk_device *dev =
container_of(_dev, struct nvk_device, vk);
struct nvk_shader *shader =
container_of(object, struct nvk_shader, base);
struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
nvk_shader_finish(dev, shader);
for (uint32_t i = 0; i < shader_count; i++) {
VkResult result = nvk_compile_shader(dev, &infos[i], state,
pAllocator, &shaders_out[i]);
if (result != VK_SUCCESS) {
/* Clean up all the shaders before this point */
for (uint32_t j = 0; j < i; j++)
nvk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
/* Clean up all the NIR after this point */
for (uint32_t j = i + 1; j < shader_count; j++)
ralloc_free(infos[j].nir);
/* Memset the output array */
memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
return result;
}
}
return VK_SUCCESS;
}
const struct vk_pipeline_cache_object_ops nvk_shader_ops = {
.serialize = nvk_shader_serialize,
.deserialize = nvk_shader_deserialize,
.destroy = nvk_shader_destroy,
};
static VkResult
nvk_deserialize_shader(struct vk_device *vk_dev,
struct blob_reader *blob,
uint32_t binary_version,
const VkAllocationCallbacks* pAllocator,
struct vk_shader **shader_out)
{
struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
struct nvk_shader *shader;
VkResult result;
struct nak_shader_info info;
blob_copy_bytes(blob, &info, sizeof(info));
struct nvk_cbuf_map cbuf_map;
blob_copy_bytes(blob, &cbuf_map, sizeof(cbuf_map));
float min_sample_shading;
blob_copy_bytes(blob, &min_sample_shading, sizeof(min_sample_shading));
const uint32_t code_size = blob_read_uint32(blob);
const uint32_t data_size = blob_read_uint32(blob);
if (blob->overrun)
return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info.stage,
pAllocator, sizeof(*shader));
if (shader == NULL)
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
shader->info = info;
shader->cbuf_map = cbuf_map;
shader->min_sample_shading = min_sample_shading;
shader->code_size = code_size;
shader->data_size = data_size;
shader->code_ptr = malloc(code_size);
if (shader->code_ptr == NULL) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
}
shader->data_ptr = malloc(data_size);
if (shader->data_ptr == NULL) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
}
blob_copy_bytes(blob, (void *)shader->code_ptr, shader->code_size);
blob_copy_bytes(blob, (void *)shader->data_ptr, shader->data_size);
if (blob->overrun) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
}
result = nvk_shader_upload(dev, shader);
if (result != VK_SUCCESS) {
nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return result;
}
*shader_out = &shader->vk;
return VK_SUCCESS;
}
static bool
nvk_shader_serialize(struct vk_pipeline_cache_object *object,
nvk_shader_serialize(struct vk_device *vk_dev,
const struct vk_shader *vk_shader,
struct blob *blob)
{
struct nvk_shader *shader =
container_of(object, struct nvk_shader, base);
struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
/* We can't currently cache assmbly */
if (shader->nak != NULL && shader->nak->asm_str != NULL)
return false;
blob_write_bytes(blob, &shader->info, sizeof(shader->info));
blob_write_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
blob_write_bytes(blob, &shader->min_sample_shading,
sizeof(shader->min_sample_shading));
blob_write_uint32(blob, shader->code_size);
blob_write_bytes(blob, shader->code_ptr, shader->code_size);
blob_write_uint32(blob, shader->data_size);
blob_write_bytes(blob, shader->code_ptr, shader->code_size);
blob_write_bytes(blob, shader->data_ptr, shader->data_size);
return !blob->out_of_memory;
}
#define WRITE_STR(field, ...) ({ \
memset(field, 0, sizeof(field)); \
UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
assert(i > 0 && i < sizeof(field)); \
})
static VkResult
nvk_shader_get_executable_properties(
UNUSED struct vk_device *device,
const struct vk_shader *vk_shader,
uint32_t *executable_count,
VkPipelineExecutablePropertiesKHR *properties)
{
struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
properties, executable_count);
vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
props->stages = mesa_to_vk_shader_stage(shader->info.stage);
props->subgroupSize = 32;
WRITE_STR(props->name, "%s",
_mesa_shader_stage_to_string(shader->info.stage));
WRITE_STR(props->description, "%s shader",
_mesa_shader_stage_to_string(shader->info.stage));
}
return vk_outarray_status(&out);
}
static VkResult
nvk_shader_get_executable_statistics(
UNUSED struct vk_device *device,
const struct vk_shader *vk_shader,
uint32_t executable_index,
uint32_t *statistic_count,
VkPipelineExecutableStatisticKHR *statistics)
{
struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
statistics, statistic_count);
assert(executable_index == 0);
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
WRITE_STR(stat->name, "Code Size");
WRITE_STR(stat->description,
"Size of the compiled shader binary, in bytes");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = shader->code_size;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
WRITE_STR(stat->name, "Number of GPRs");
WRITE_STR(stat->description, "Number of GPRs used by this pipeline");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = shader->info.num_gprs;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
WRITE_STR(stat->name, "SLM Size");
WRITE_STR(stat->description,
"Size of shader local (scratch) memory, in bytes");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = shader->info.slm_size;
}
return vk_outarray_status(&out);
}
static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
const char *data)
{
ir->isText = VK_TRUE;
size_t data_len = strlen(data) + 1;
if (ir->pData == NULL) {
ir->dataSize = data_len;
return true;
}
strncpy(ir->pData, data, ir->dataSize);
if (ir->dataSize < data_len)
return false;
ir->dataSize = data_len;
return true;
}
static struct vk_pipeline_cache_object *
nvk_shader_deserialize(struct vk_pipeline_cache *cache,
const void *key_data,
size_t key_size,
struct blob_reader *blob)
static VkResult
nvk_shader_get_executable_internal_representations(
UNUSED struct vk_device *device,
const struct vk_shader *vk_shader,
uint32_t executable_index,
uint32_t *internal_representation_count,
VkPipelineExecutableInternalRepresentationKHR *internal_representations)
{
struct nvk_device *dev =
container_of(cache->base.device, struct nvk_device, vk);
struct nvk_shader *shader =
nvk_shader_init(dev, key_data, key_size);
struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
internal_representations,
internal_representation_count);
bool incomplete_text = false;
if (!shader)
return NULL;
assert(executable_index == 0);
blob_copy_bytes(blob, &shader->info, sizeof(shader->info));
blob_copy_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
if (shader->nak != NULL && shader->nak->asm_str != NULL) {
vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
WRITE_STR(ir->name, "NAK assembly");
WRITE_STR(ir->description, "NAK assembly");
if (!write_ir_text(ir, shader->nak->asm_str))
incomplete_text = true;
}
}
shader->code_size = blob_read_uint32(blob);
void *code_ptr = malloc(shader->code_size);
if (!code_ptr)
goto fail;
blob_copy_bytes(blob, code_ptr, shader->code_size);
shader->code_ptr = code_ptr;
shader->data_size = blob_read_uint32(blob);
void *data_ptr = malloc(shader->data_size);
if (!data_ptr)
goto fail;
blob_copy_bytes(blob, data_ptr, shader->data_size);
shader->data_ptr = data_ptr;
return &shader->base;
fail:
/* nvk_shader_destroy frees both shader and shader->xfb */
nvk_shader_destroy(cache->base.device, &shader->base);
return NULL;
return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
}
static const struct vk_shader_ops nvk_shader_ops = {
.destroy = nvk_shader_destroy,
.serialize = nvk_shader_serialize,
.get_executable_properties = nvk_shader_get_executable_properties,
.get_executable_statistics = nvk_shader_get_executable_statistics,
.get_executable_internal_representations =
nvk_shader_get_executable_internal_representations,
};
const struct vk_device_shader_ops nvk_device_shader_ops = {
.get_nir_options = nvk_get_nir_options,
.get_spirv_options = nvk_get_spirv_options,
.preprocess_nir = nvk_preprocess_nir,
.hash_graphics_state = nvk_hash_graphics_state,
.compile = nvk_compile_shaders,
.deserialize = nvk_deserialize_shader,
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
.cmd_bind_shaders = nvk_cmd_bind_shaders,
};

View file

@ -14,6 +14,8 @@
#include "nir.h"
#include "nouveau_bo.h"
#include "vk_shader.h"
struct nak_shader_bin;
struct nvk_device;
struct nvk_physical_device;
@ -57,11 +59,14 @@ struct nvk_cbuf_map {
};
struct nvk_shader {
struct vk_pipeline_cache_object base;
struct vk_shader vk;
struct nak_shader_info info;
struct nvk_cbuf_map cbuf_map;
/* Only relevant for fragment shaders */
float min_sample_shading;
struct nak_shader_bin *nak;
const void *code_ptr;
uint32_t code_size;
@ -84,11 +89,7 @@ struct nvk_shader {
uint64_t data_addr;
};
static inline bool
nvk_shader_is_enabled(const struct nvk_shader *shader)
{
return shader && shader->upload_size > 0;
}
extern const struct vk_device_shader_ops nvk_device_shader_ops;
VkShaderStageFlags nvk_nak_stages(const struct nv_device_info *info);
@ -115,18 +116,6 @@ nvk_nir_lower_descriptors(nir_shader *nir,
uint32_t set_layout_count,
struct vk_descriptor_set_layout * const *set_layouts,
struct nvk_cbuf_map *cbuf_map_out);
VkResult
nvk_shader_stage_to_nir(struct nvk_device *dev,
const VkPipelineShaderStageCreateInfo *sinfo,
const struct vk_pipeline_robustness_state *rstate,
struct vk_pipeline_cache *cache,
void *mem_ctx, struct nir_shader **nir_out);
void
nvk_populate_fs_key(struct nak_fs_key *key,
const struct vk_graphics_pipeline_state *state);
void
nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
const struct vk_pipeline_robustness_state *rs,
@ -135,37 +124,9 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
struct vk_descriptor_set_layout * const *set_layouts,
struct nvk_cbuf_map *cbuf_map_out);
VkResult
nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
VkPipelineCreateFlagBits2KHR pipeline_flags,
const struct vk_pipeline_robustness_state *rstate,
const struct nak_fs_key *fs_key,
struct vk_pipeline_cache *cache,
struct nvk_shader *shader);
VkResult
nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader);
struct nvk_shader *
nvk_shader_init(struct nvk_device *dev, const void *key_data, size_t key_size);
extern const struct vk_pipeline_cache_object_ops nvk_shader_ops;
void
nvk_shader_finish(struct nvk_device *dev, struct nvk_shader *shader);
void
nvk_hash_shader(unsigned char *hash,
const VkPipelineShaderStageCreateInfo *sinfo,
const struct vk_pipeline_robustness_state *rstate,
bool is_multiview,
const struct vk_pipeline_layout *layout,
const struct nak_fs_key *fs_key);
void
nvk_shader_destroy(struct vk_device *dev,
struct vk_pipeline_cache_object *object);
/* Codegen wrappers.
*
* TODO: Delete these once NAK supports everything.