mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
nvk: rework QMD handling to support pre Turing
Signed-off-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
parent
80d0a074fc
commit
2a8673f694
3 changed files with 133 additions and 31 deletions
|
|
@ -13,6 +13,7 @@
|
|||
#include "nvk_cl9097.h"
|
||||
#include "nvk_cla0c0.h"
|
||||
#include "cla1c0.h"
|
||||
#include "clc0c0.h"
|
||||
#include "nvk_clc3c0.h"
|
||||
#include "nvk_clc597.h"
|
||||
|
||||
|
|
@ -34,12 +35,22 @@ nvk_cmd_buffer_begin_compute(struct nvk_cmd_buffer *cmd,
|
|||
{ }
|
||||
|
||||
static void
|
||||
qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
nva0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0c0_qmd_set_dispatch_size(UNUSED struct nvk_device *dev, uint32_t *qmd,
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, x);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, y);
|
||||
/* this field is different from older QMD versions */
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, z);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -54,8 +65,18 @@ qmd_dispatch_size_offset(struct nvk_device *dev)
|
|||
}
|
||||
|
||||
static inline void
|
||||
gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
nva0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
{
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvc0c0_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
|
||||
uint32_t size, uint64_t address)
|
||||
{
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
|
||||
|
|
@ -90,6 +111,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
|
||||
const struct nvk_shader *shader =
|
||||
&pipeline->base.shaders[MESA_SHADER_COMPUTE];
|
||||
const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -112,13 +134,24 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
memset(qmd, 0, sizeof(qmd));
|
||||
memcpy(qmd, pipeline->qmd_template, sizeof(pipeline->qmd_template));
|
||||
|
||||
qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.grid_size[0],
|
||||
desc->root.cs.grid_size[1],
|
||||
desc->root.cs.grid_size[2]);
|
||||
if (dev->ctx->compute.cls >= PASCAL_COMPUTE_A) {
|
||||
nvc0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.grid_size[0],
|
||||
desc->root.cs.grid_size[1],
|
||||
desc->root.cs.grid_size[2]);
|
||||
|
||||
gp100_cp_launch_desc_set_cb(qmd, 0, sizeof(desc->root), root_desc_addr);
|
||||
gp100_cp_launch_desc_set_cb(qmd, 1, sizeof(desc->root), root_desc_addr);
|
||||
nvc0c0_cp_launch_desc_set_cb(qmd, 0, sizeof(desc->root), root_desc_addr);
|
||||
nvc0c0_cp_launch_desc_set_cb(qmd, 1, sizeof(desc->root), root_desc_addr);
|
||||
} else {
|
||||
assert(dev->ctx->compute.cls >= KEPLER_COMPUTE_A);
|
||||
nva0c0_qmd_set_dispatch_size(nvk_cmd_buffer_device(cmd), qmd,
|
||||
desc->root.cs.grid_size[0],
|
||||
desc->root.cs.grid_size[1],
|
||||
desc->root.cs.grid_size[2]);
|
||||
|
||||
nva0c0_cp_launch_desc_set_cb(qmd, 0, sizeof(desc->root), root_desc_addr);
|
||||
nva0c0_cp_launch_desc_set_cb(qmd, 1, sizeof(desc->root), root_desc_addr);
|
||||
}
|
||||
|
||||
uint64_t qmd_addr;
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 256, &qmd_addr);
|
||||
|
|
|
|||
|
|
@ -1,17 +1,22 @@
|
|||
#include "nvk_private.h"
|
||||
#include "nvk_device.h"
|
||||
#include "nvk_physical_device.h"
|
||||
#include "nvk_pipeline.h"
|
||||
#include "nvk_shader.h"
|
||||
#include "vk_nir.h"
|
||||
#include "vk_pipeline.h"
|
||||
|
||||
#include "nouveau_bo.h"
|
||||
#include "nouveau_context.h"
|
||||
|
||||
#include "compiler/spirv/nir_spirv.h"
|
||||
|
||||
#include "drf.h"
|
||||
#include "cla0c0.h"
|
||||
#include "cla0c0qmd.h"
|
||||
#include "clc0c0.h"
|
||||
#include "clc0c0qmd.h"
|
||||
#include "clc3c0.h"
|
||||
#include "clc3c0qmd.h"
|
||||
#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
|
||||
#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
|
||||
|
|
@ -20,6 +25,11 @@
|
|||
#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
|
||||
#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
|
||||
|
||||
#define QMD_DEF_SET(qmd, class_id, version_major, version_minor, a...) \
|
||||
NVDEF_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
|
||||
#define QMD_VAL_SET(qmd, class_id, version_major, version_minor, a...) \
|
||||
NVVAL_MW_SET((qmd), NV##class_id, QMDV##version_major##_##version_minor, ##a)
|
||||
|
||||
static int
|
||||
gv100_sm_config_smem_size(uint32_t size)
|
||||
{
|
||||
|
|
@ -31,32 +41,84 @@ gv100_sm_config_smem_size(uint32_t size)
|
|||
return (size / 4096) + 1;
|
||||
}
|
||||
|
||||
#define base_compute_setup_launch_desc_template(qmd, shader, class_id, version_major, version_minor) \
|
||||
do { \
|
||||
QMD_DEF_SET(qmd, class_id, version_major, version_minor, API_VISIBLE_CALL_LIMIT, NO_CHECK); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, BARRIER_COUNT, shader->num_barriers); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION0, \
|
||||
shader->cp.block_size[0]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION1, \
|
||||
shader->cp.block_size[1]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, CTA_THREAD_DIMENSION2, \
|
||||
shader->cp.block_size[2]); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_MAJOR_VERSION, version_major); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, QMD_VERSION, version_minor); \
|
||||
QMD_DEF_SET(qmd, class_id, version_major, version_minor, SAMPLER_INDEX, INDEPENDENTLY); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHADER_LOCAL_MEMORY_LOW_SIZE, \
|
||||
align(shader->slm_size, 0x10)); \
|
||||
QMD_VAL_SET(qmd, class_id, version_major, version_minor, SHARED_MEMORY_SIZE, \
|
||||
align(shader->cp.smem_size, 0x100)); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
gv100_compute_setup_launch_desc_template(uint32_t *qmd,
|
||||
struct nvk_shader *shader)
|
||||
nva0c0_compute_setup_launch_desc_template(uint32_t *qmd,
|
||||
struct nvk_shader *shader)
|
||||
{
|
||||
base_compute_setup_launch_desc_template(qmd, shader, A0C0, 00, 06);
|
||||
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);
|
||||
|
||||
if (shader->cp.smem_size <= (16 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
|
||||
else if (shader->cp.smem_size <= (32 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
|
||||
else if (shader->cp.smem_size <= (48 << 10))
|
||||
NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
|
||||
else
|
||||
unreachable("Invalid shared memory size");
|
||||
|
||||
uint64_t addr = nvk_shader_address(shader);
|
||||
assert(addr < 0xffffffff);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, addr);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, shader->num_gprs);
|
||||
NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0c0_compute_setup_launch_desc_template(uint32_t *qmd,
|
||||
struct nvk_shader *shader)
|
||||
{
|
||||
base_compute_setup_launch_desc_template(qmd, shader, C0C0, 02, 01);
|
||||
|
||||
uint64_t addr = nvk_shader_address(shader);
|
||||
assert(addr < 0xffffffff);
|
||||
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, addr);
|
||||
NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, shader->num_gprs);
|
||||
}
|
||||
|
||||
static void
|
||||
nvc3c0_compute_setup_launch_desc_template(uint32_t *qmd,
|
||||
struct nvk_shader *shader)
|
||||
{
|
||||
base_compute_setup_launch_desc_template(qmd, shader, C3C0, 02, 02);
|
||||
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
|
||||
NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
|
||||
NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
|
||||
align(shader->cp.smem_size, 0x100));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
|
||||
align(shader->slm_size, 0x10));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
|
||||
/* those are all QMD 2.2+ */
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(8 * 1024));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(96 * 1024));
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
|
||||
gv100_sm_config_smem_size(shader->cp.smem_size));
|
||||
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, shader->cp.block_size[0]);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, shader->cp.block_size[1]);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, shader->cp.block_size[2]);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, shader->num_gprs);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, shader->num_barriers);
|
||||
|
||||
uint64_t addr = nvk_shader_address(shader);
|
||||
NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, addr & 0xffffffff);
|
||||
|
|
@ -114,7 +176,16 @@ nvk_compute_pipeline_create(struct nvk_device *device,
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
gv100_compute_setup_launch_desc_template(pipeline->qmd_template, &pipeline->base.shaders[MESA_SHADER_COMPUTE]);
|
||||
struct nvk_shader *shader = &pipeline->base.shaders[MESA_SHADER_COMPUTE];
|
||||
if (device->ctx->compute.cls >= VOLTA_COMPUTE_A)
|
||||
nvc3c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
|
||||
else if (device->ctx->compute.cls >= PASCAL_COMPUTE_A)
|
||||
nvc0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
|
||||
else if (device->ctx->compute.cls >= KEPLER_COMPUTE_A)
|
||||
nva0c0_compute_setup_launch_desc_template(pipeline->qmd_template, shader);
|
||||
else
|
||||
unreachable("Fermi and older not supported!");
|
||||
|
||||
*pPipeline = nvk_pipeline_to_handle(&pipeline->base);
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
|
|
|||
|
|
@ -210,8 +210,6 @@ nvk_queue_state_update(struct nvk_device *dev,
|
|||
|
||||
P_MTHD(p, NVA0C0, SET_SHADER_SHARED_MEMORY_WINDOW);
|
||||
P_NVA0C0_SET_SHADER_SHARED_MEMORY_WINDOW(p, 0xfe << 24);
|
||||
|
||||
// TODO CODE_ADDRESS_HIGH
|
||||
}
|
||||
|
||||
/* From nvc0_screen.c:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue