mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
nvk: Wire up ROOT_TABLE
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/12576 Reviewed-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40639>
This commit is contained in:
parent
bff2d8dd9b
commit
30b3de6ec4
9 changed files with 188 additions and 31 deletions
|
|
@ -97,6 +97,7 @@ struct nak_constant_offset_info {
|
|||
uint32_t printf_buffer_offset;
|
||||
};
|
||||
const extern struct nak_constant_offset_info nak_const_offsets_base;
|
||||
const extern struct nak_constant_offset_info nak_const_offsets_turing_graphics;
|
||||
|
||||
#define NAK_PRINTF_BUFFER_SIZE 0x40000
|
||||
|
||||
|
|
|
|||
|
|
@ -346,7 +346,11 @@ enum nak_fs_out {
|
|||
static inline const struct nak_constant_offset_info*
|
||||
nak_const_offsets(const struct nak_compiler* nak, bool is_graphics)
|
||||
{
|
||||
return &nak_const_offsets_base;
|
||||
if (nak->sm >= 75 && is_graphics) {
|
||||
return &nak_const_offsets_turing_graphics;
|
||||
} else {
|
||||
return &nak_const_offsets_base;
|
||||
}
|
||||
}
|
||||
|
||||
bool nak_nir_rematerialize_load_const(nir_shader *nir);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
#include "clc597.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
|
@ -80,6 +81,17 @@ struct nvk_root_descriptor_table {
|
|||
static_assert(nvk_root_descriptor_offset(push) % 8 == 0,
|
||||
"Push constants should be aligned properly");
|
||||
|
||||
#define nvk_hw_root_table_index(member)\
|
||||
(nvk_root_descriptor_offset(member) / NVK_HW_ROOT_TABLE_SIZE)
|
||||
#define nvk_hw_root_table_offset(member)\
|
||||
(nvk_root_descriptor_offset(member) % NVK_HW_ROOT_TABLE_SIZE)
|
||||
|
||||
static inline bool nvk_use_hw_root_table(const struct nv_device_info *info,
|
||||
bool is_gfx)
|
||||
{
|
||||
return is_gfx && info->cls_eng3d >= TURING_A;
|
||||
}
|
||||
|
||||
enum ENUM_PACKED nvk_descriptor_set_type {
|
||||
NVK_DESCRIPTOR_SET_TYPE_NONE,
|
||||
NVK_DESCRIPTOR_SET_TYPE_SET,
|
||||
|
|
|
|||
|
|
@ -521,6 +521,16 @@ nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p)
|
|||
.binding_group4_enable = 0x3,
|
||||
});
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR);
|
||||
P_NVC597_SET_ROOT_TABLE_SELECTOR(p, {
|
||||
.root_table = i,
|
||||
.offset = 0,
|
||||
});
|
||||
for (uint32_t dw = 0; dw < 64; dw++)
|
||||
P_INLINE_DATA(p, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (pdev->info.cls_eng3d >= TURING_A) {
|
||||
|
|
@ -702,16 +712,45 @@ nvk_cmd_flush_gfx_root_desc(struct nvk_cmd_buffer *cmd,
|
|||
struct nvk_descriptor_state *desc,
|
||||
size_t offset, size_t size)
|
||||
{
|
||||
const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
|
||||
const uint32_t start_dw = offset / 4;
|
||||
const uint32_t end_dw = DIV_ROUND_UP(offset + size, 4);
|
||||
const uint32_t len_dw = end_dw - start_dw;
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw);
|
||||
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4);
|
||||
|
||||
const uint32_t *root_dw = (uint32_t *)desc->root;
|
||||
P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw);
|
||||
|
||||
if (nvk_use_hw_root_table(&pdev->info, true)) {
|
||||
const uint32_t TABLE_SIZE_DW = NVK_HW_ROOT_TABLE_SIZE / sizeof(uint32_t);
|
||||
const uint32_t start_table = start_dw / TABLE_SIZE_DW;
|
||||
const uint32_t end_table = DIV_ROUND_UP(end_dw, TABLE_SIZE_DW);
|
||||
for (uint32_t table = start_table; table < end_table; table++) {
|
||||
const uint32_t start_dw_table =
|
||||
(table == start_table)
|
||||
? (start_dw - table * TABLE_SIZE_DW)
|
||||
: 0;
|
||||
const uint32_t end_dw_table =
|
||||
(table == end_table - 1)
|
||||
? (end_dw - table * TABLE_SIZE_DW)
|
||||
: TABLE_SIZE_DW;
|
||||
const uint32_t len_dw_table = end_dw_table - start_dw_table;
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw_table);
|
||||
P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR);
|
||||
P_NVC597_SET_ROOT_TABLE_SELECTOR(p, {
|
||||
.root_table = table,
|
||||
.offset = start_dw_table * 4,
|
||||
});
|
||||
P_INLINE_ARRAY(p, &root_dw[start_dw_table + table * TABLE_SIZE_DW], len_dw_table);
|
||||
}
|
||||
} else {
|
||||
const uint32_t len_dw = end_dw - start_dw;
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw);
|
||||
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4);
|
||||
|
||||
P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -3254,6 +3293,14 @@ nvk_mme_anti_alias_samples(uint32_t samples)
|
|||
return nvk_mme_val_mask(samples_log2 << 4, 0x00f0);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_anti_alias_mask(struct mme_builder *b, struct mme_value mask)
|
||||
{
|
||||
if (nvk_use_hw_root_table(b->devinfo, true))
|
||||
mme_mthd(b, NVC597_LOAD_ROOT_TABLE);
|
||||
mme_emit(b, mask);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_mme_set_anti_alias(struct mme_builder *b)
|
||||
{
|
||||
|
|
@ -3316,9 +3363,20 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
|
|||
*/
|
||||
STATIC_ASSERT(sizeof(struct nak_sample_mask) == 2);
|
||||
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks)));
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||||
if (nvk_use_hw_root_table(b->devinfo, true)) {
|
||||
uint32_t root_table_selector;
|
||||
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, {
|
||||
.root_table = nvk_hw_root_table_index(draw.sample_masks),
|
||||
.offset = nvk_hw_root_table_offset(draw.sample_masks),
|
||||
});
|
||||
|
||||
mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR);
|
||||
mme_emit(b, mme_imm(root_table_selector));
|
||||
} else {
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks)));
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||||
}
|
||||
|
||||
/* Annoyingly, we have to pack these in pairs */
|
||||
|
||||
|
|
@ -3331,7 +3389,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
|
|||
for (uint32_t i = 0; i < NVK_MAX_SAMPLES; i += 2) {
|
||||
uint32_t mask0 = 1 << i;
|
||||
uint32_t mask1 = 1 << (i + 1);
|
||||
mme_emit(b, mme_imm(mask0 | (mask1 << 16)));
|
||||
emit_anti_alias_mask(b, mme_imm(mask0 | (mask1 << 16)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3339,14 +3397,14 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
|
|||
mme_if(b, ieq, passes_log2, mme_zero()) {
|
||||
/* It's a single pass so we can use 0xffff */
|
||||
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++)
|
||||
mme_emit(b, mme_imm(~0));
|
||||
emit_anti_alias_mask(b, mme_imm(~0));
|
||||
}
|
||||
|
||||
mme_if(b, ieq, passes_log2, mme_imm(1)) {
|
||||
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) {
|
||||
struct mme_value mask =
|
||||
nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_2PASS_0, i);
|
||||
mme_emit(b, mask);
|
||||
emit_anti_alias_mask(b, mask);
|
||||
mme_free_reg(b, mask);
|
||||
}
|
||||
}
|
||||
|
|
@ -3355,7 +3413,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
|
|||
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) {
|
||||
struct mme_value mask =
|
||||
nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_4PASS_0, i);
|
||||
mme_emit(b, mask);
|
||||
emit_anti_alias_mask(b, mask);
|
||||
mme_free_reg(b, mask);
|
||||
}
|
||||
}
|
||||
|
|
@ -3390,11 +3448,24 @@ nvk_mme_set_anti_alias_test_check(
|
|||
assert(results[1].mthd == NV9097_SET_HYBRID_ANTI_ALIAS_CONTROL);
|
||||
assert(results[1].data == expected[2]);
|
||||
|
||||
assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks));
|
||||
if (nvk_use_hw_root_table(devinfo, true)) {
|
||||
uint32_t root_table_selector;
|
||||
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, {
|
||||
.root_table = nvk_hw_root_table_index(draw.sample_masks),
|
||||
.offset = nvk_hw_root_table_offset(draw.sample_masks),
|
||||
});
|
||||
assert(results[2].mthd == NVC597_SET_ROOT_TABLE_SELECTOR);
|
||||
assert(results[2].data == root_table_selector);
|
||||
} else {
|
||||
assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i));
|
||||
if (nvk_use_hw_root_table(devinfo, true))
|
||||
assert(results[3 + i].mthd == NVC597_LOAD_ROOT_TABLE);
|
||||
else
|
||||
assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i));
|
||||
assert(results[3 + i].data == expected[3 + i]);
|
||||
}
|
||||
|
||||
|
|
@ -4434,10 +4505,23 @@ nvk_mme_set_cb0_mthd(struct mme_builder *b,
|
|||
mme_mthd(b, mthd);
|
||||
mme_emit(b, val);
|
||||
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
mme_emit(b, mme_imm(cb0_offset));
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||||
mme_emit(b, val);
|
||||
if (nvk_use_hw_root_table(b->devinfo, true)) {
|
||||
uint32_t root_table_selector;
|
||||
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{
|
||||
.root_table = cb0_offset / NVK_HW_ROOT_TABLE_SIZE,
|
||||
.offset = cb0_offset % NVK_HW_ROOT_TABLE_SIZE,
|
||||
});
|
||||
|
||||
mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR);
|
||||
mme_emit(b, mme_imm(root_table_selector));
|
||||
mme_mthd(b, NVC597_LOAD_ROOT_TABLE);
|
||||
mme_emit(b, val);
|
||||
} else {
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
mme_emit(b, mme_imm(cb0_offset));
|
||||
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
|
||||
mme_emit(b, val);
|
||||
}
|
||||
}
|
||||
mme_free_reg(b, old);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -508,9 +508,24 @@ build_push_write_push_const(nir_builder *b, struct nvk_nir_push *p,
|
|||
assert(pc_range->size % 4 == 0);
|
||||
const uint32_t dw_count = pc_range->size / 4;
|
||||
|
||||
nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count);
|
||||
nvk_nir_push_dw(b, p, nir_imm_int(b,
|
||||
nvk_root_descriptor_offset(push) + pc_range->offset));
|
||||
assert(!(pc_range->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT));
|
||||
if (nvk_use_hw_root_table(&pdev->info, true)) {
|
||||
const uint32_t table = nvk_hw_root_table_index(push);
|
||||
static_assert(nvk_hw_root_table_offset(push) == 0,
|
||||
"Push constants are aligned");
|
||||
|
||||
uint32_t root_table_selector;
|
||||
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{
|
||||
.root_table = table,
|
||||
.offset = pc_range->offset,
|
||||
});
|
||||
nvk_nir_P_1INC(b, p, NVC597, SET_ROOT_TABLE_SELECTOR, 1 + dw_count);
|
||||
nvk_nir_push_dw(b, p, nir_imm_int(b, root_table_selector));
|
||||
} else {
|
||||
nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count);
|
||||
nvk_nir_push_dw(b, p, nir_imm_int(b,
|
||||
nvk_root_descriptor_offset(push) + pc_range->offset));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -568,12 +568,22 @@ _load_root_table(nir_builder *b,
|
|||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
unsigned align_mul = bit_size / 8;
|
||||
uint32_t base, cbuf;
|
||||
if (nvk_use_hw_root_table(ctx->dev_info,
|
||||
b->shader->info.stage != MESA_SHADER_COMPUTE)) {
|
||||
cbuf = NVK_HW_ROOT_TABLE_FIRST_CB +
|
||||
root_table_offset / NVK_HW_ROOT_TABLE_SIZE;
|
||||
base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE;
|
||||
} else {
|
||||
cbuf = 0; /* Root table */
|
||||
base = root_table_offset;
|
||||
}
|
||||
return nir_ldc_nv(b, num_components, bit_size,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, cbuf),
|
||||
nir_imm_int(b, 0),
|
||||
.align_mul = align_mul,
|
||||
.align_offset = 0,
|
||||
.base = root_table_offset);
|
||||
.base = base);
|
||||
}
|
||||
|
||||
#define load_root_table(b, nc, bs, member, ctx) \
|
||||
|
|
@ -583,18 +593,32 @@ static nir_def *
|
|||
_load_root_table_array(nir_builder *b,
|
||||
unsigned num_components, unsigned bit_size,
|
||||
uint32_t root_table_offset, uint32_t stride,
|
||||
nir_def *index,
|
||||
uint32_t array_size, nir_def *index,
|
||||
const struct lower_descriptors_ctx *ctx)
|
||||
{
|
||||
uint32_t base, cbuf;
|
||||
if (nvk_use_hw_root_table(ctx->dev_info,
|
||||
b->shader->info.stage != MESA_SHADER_COMPUTE)) {
|
||||
assert(root_table_offset % NVK_HW_ROOT_TABLE_SIZE + array_size <=
|
||||
NVK_HW_ROOT_TABLE_SIZE);
|
||||
|
||||
cbuf = NVK_HW_ROOT_TABLE_FIRST_CB +
|
||||
root_table_offset / NVK_HW_ROOT_TABLE_SIZE;
|
||||
base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE;
|
||||
} else {
|
||||
cbuf = 0; /* Root table */
|
||||
base = root_table_offset;
|
||||
}
|
||||
return nir_ldc_nv(b, num_components, bit_size,
|
||||
nir_imm_int(b, 0), /* Root table */
|
||||
nir_imm_int(b, cbuf),
|
||||
nir_imul_imm(b, index, stride),
|
||||
.base = root_table_offset);
|
||||
.base = base);
|
||||
}
|
||||
|
||||
#define load_root_table_array(b, nc, bs, member, index, ctx) \
|
||||
_load_root_table_array(b, nc, bs, nvk_root_descriptor_offset(member), \
|
||||
sizeof(((struct nvk_root_descriptor_table){}).member[0]), \
|
||||
sizeof(((struct nvk_root_descriptor_table){}).member), \
|
||||
index, ctx)
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -42,6 +42,10 @@
|
|||
#define NVK_MAX_IMAGE_PLANES 3
|
||||
#define NVK_MAX_SAMPLER_PLANES 2
|
||||
|
||||
#define NVK_HW_ROOT_TABLE_SIZE 256
|
||||
#define NVK_HW_ROOT_TABLE_FIRST_CB 24
|
||||
#define NVK_HW_ROOT_TABLE_COUNT 8
|
||||
|
||||
/* Device Generated Commands */
|
||||
#define NVK_DGC_ALIGN 0x100
|
||||
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ nvk_queue_init_context_state(struct nvk_queue *queue)
|
|||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
VkResult result;
|
||||
|
||||
uint32_t push_data[4096];
|
||||
uint32_t push_data[4096 + 1024];
|
||||
struct nv_push push;
|
||||
nv_push_init(&push, push_data, ARRAY_SIZE(push_data),
|
||||
nvk_queue_subchannels_from_engines(queue->engines));
|
||||
|
|
|
|||
|
|
@ -42,6 +42,19 @@ const struct nak_constant_offset_info nak_const_offsets_base = {
|
|||
.printf_buffer_offset = nvk_root_descriptor_offset(printf_buffer_addr),
|
||||
};
|
||||
|
||||
const struct nak_constant_offset_info nak_const_offsets_turing_graphics = {
|
||||
.sample_info_cb = NVK_HW_ROOT_TABLE_FIRST_CB +
|
||||
nvk_hw_root_table_index(draw.sample_locations),
|
||||
.sample_locations_offset = nvk_hw_root_table_offset(draw.sample_locations),
|
||||
.sample_masks_offset = nvk_hw_root_table_offset(draw.sample_masks),
|
||||
.printf_cb = NVK_HW_ROOT_TABLE_FIRST_CB +
|
||||
nvk_hw_root_table_index(printf_buffer_addr),
|
||||
.printf_buffer_offset = nvk_hw_root_table_offset(printf_buffer_addr),
|
||||
};
|
||||
static_assert(nvk_hw_root_table_index(draw.sample_locations) ==
|
||||
nvk_hw_root_table_index(draw.sample_masks),
|
||||
"Sample info is in same root table");
|
||||
|
||||
static void
|
||||
shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue